library(tidyverse)
library(GenomicRanges)
library(edgeR)
library(scales)
library(extraChIPs)
library(here)
library(tadar)
library(kableExtra)
library(magrittr)
library(pander)
library(viridis)
library(reactable)
library(ggpubr)
theme_set(theme_bw())
lb_reactable <- function(
        tbl, highlight = TRUE, striped = TRUE, compact = TRUE,
        wrap = FALSE, resizable = TRUE, searchable = TRUE,
        style = list(fontFamily = "Calibri, sans-serif"), ...
){
    reactable(
        tbl,
        highlight = highlight, striped = striped, compact = compact,
        wrap = wrap, resizable = TRUE, searchable = TRUE,
        style = style, ...
    )
}
tooltip <- function(value, tooltip) {
    tags$abbr(
        style = "text-decoration: underline; text-decoration-style: solid; cursor: help",
        title = tooltip,
        value
    )
}
react_format <- function(format, digits = 2){
    function(val, ind, col_name){
        formatC(val, digits = digits, format = format)
    }
}

Introduction

What is DAR?

  • Differential Allelic Representation
  • DAR describes an unequal distribution of polymorphic loci between two experimental sample groups (e.g. Mutant vs WT)

  • It’s extent is quantified by the DAR metric, which represents the difference in genetic sequence at a locus
  • A DAR metric is calculated at all suitable loci along the genome
  • A value between 0 and 1 inclusive, where 0 represent complete identity and 1 represents complete diversity

How does DAR affect RNA-seq analysis?

  • When a locus with high DAR is also an eQTL, a gene’s expression level is observed as different between the experimental groups
  • This results in the gene being classified as differentially expressed
  • However, the differential expression is in fact an artefact of DAR, and not due to the experimental condition being tested

Strategy

The Beta distribution

  • Normally, under the null hypothesis p-values follow a uniform distribution
  • The uniform distribution is a special case beta distribution
  • The beta distribution’s shape is controlled by the beta function:

\[B(\alpha,\beta)=\int_{0}^{1}t^{\alpha-1}(1-t)^{\beta-1}dt\]

  • When we set \(\alpha\) (shape1) and \(\beta\) (shape2) equal to 1, we see the uniform distribution:
rbeta(10e4, shape1 = 1, shape2 = 1) %>%
  as_tibble() %>%
  ggplot(aes(value)) +
  geom_histogram(
    colour = "black", fill = "grey50", bins = 50, boundary = 0
  ) +
  labs(x = "p")

  • Changing \(\alpha\) and \(\beta\) to different values skews the distribution:
tibble(
  `alpha = 0.5` = rbeta(10e4, shape1 = 0.5, shape2 = 1),
  `alpha = 0.8` = rbeta(10e4, shape1 = 0.8, shape2 = 1),
  `beta = 0.5` = rbeta(10e4, shape1 = 1, shape2 = 0.5),
  `beta = 0.8` = rbeta(10e4, shape1 = 1, shape2 = 0.8)
) %>%
  pivot_longer(cols = everything()) %>%
  ggplot(aes(value)) +
  geom_histogram(
    colour = "black", fill = "grey50", bins = 50, boundary = 0
  ) +
  labs(x = "p") +
  facet_wrap(~name, nrow = 2)

How can we use the DAR metric to improve our interpretations?

  • We propose that in the presence of DAR, the p-value distribution under the null hypothesis is right-skewed
  • Similarly to setting \(\alpha\) to a value less than 1
  • If we can estimate the value of \(\alpha\) at any DAR value, we can use beta probability distribution function to correct our p-values from differential expression testing of genes in regions of high DAR.

Dataset A (zebrafish)

Sample structure

  • Q96K97del/+ (\(n = 8\), psen1 gene, Alzheimer’s disease model)
  • A603fs/+ (\(n = 8\), naglu gene, MPS IIIB model)
  • WT (\(n = 8\))

Load data

dar_A <- read_rds(here("data/dar/dar_fixed_A.Rds")) %>%
  sortSeqlevels() %>%
  endoapply(sort)
gene_dar_A <- read_rds("data/dar/gene_dar_fixed_A.Rds") %>%
  sortSeqlevels() %>%
  endoapply(sort)
dge_A <- read_rds("data/de/dgeList_psen1_naglu.Rds")
dge_A$samples <- dge_A$samples %>%
  mutate(
    group = case_when(
      genotype == "EOfAD-like" ~ "eofad",
      genotype == "MPS-IIIB" ~ "mps",
      genotype == "WT" ~ "wt",
    ),
    group = factor(group, levels = c("wt", "eofad", "mps"))
  )
dge_A <- DGEList(
  counts = dge_A$counts,
  samples = dge_A$samples,
  genes = dge_A$genes,
)
dge_A <- dge_A %>%
  estimateDisp(design = model.matrix(~0 + group, .$samples))
colnames(dge_A$design) <- str_remove_all(colnames(dge_A$design), "^group")
contrasts_A <- makeContrasts(
  eofad = eofad - wt,
  mps = mps - wt,
  levels = unique(dge_A$samples$group)
)
## Rerun the analysis using glmQL-Fits
res_A <- lapply(colnames(contrasts_A), function(x){
  dge_A %>%
    glmQLFit() %>%
    glmQLFTest(contrast = contrasts_A[,x]) %>%
    # glmQLFTest(coef = "groupmps") %>%
    topTags(n = Inf) %>%
    .[["table"]] %>%
    makeGRangesFromDataFrame(keep.extra.columns = TRUE) %>%
    sortSeqlevels() %>%
    sort()
}) %>%
  set_names(colnames(contrasts_A))

Exploration

lapply(names(res_A), function(x){
  res_A[[x]] %>%
    mcols() %>%
    as_tibble() %>%
    dplyr::select(
      starts_with("gene"), starts_with("log"), PValue, FDR
    ) %>%
    left_join(
      mcols(gene_dar_A[[x]]) %>% as_tibble() %>% dplyr::select(gene_id, dar)
    ) %>%
    ggplot(aes(dar, -log10(PValue))) +
    geom_point() +
    labs(title = x)
})

lapply(names(res_A), function(x){
  res_A[[x]] %>%
    mcols() %>%
    as_tibble() %>%
    dplyr::select(
      starts_with("gene"), starts_with("log"), PValue, FDR
    ) %>%
    left_join(
      mcols(gene_dar_A[[x]]) %>% as_tibble() %>% dplyr::select(gene_id, dar)
    ) %>%
    mutate(
      dar_q = cut(
        dar,
        breaks = quantile(dar, probs = seq(0, 1, length.out = 21), na.rm = TRUE),
        # breaks = seq(0, 1, length.out = 7),
        include.lowest = TRUE
      ) %>%
        fct_na_value_to_level(levels(.)[[1]])
    ) %>%
    ggplot(aes(PValue, after_stat(density))) +
    geom_histogram(fill = "grey70", colour = "black", binwidth = 0.01) +
    # geom_line(
    #   ## Add 1 - dar^2 in red
    #   aes(y = y, colour = f),
    #   data = . %>%
    #     pull(dar_q) %>%
    #     levels() %>%
    #     sapply(
    #       \(x) {
    #         min_dar = str_replace_all(x, "^[^0-9]([0-9\\.]+),.+", "\\1") %>% as.numeric()
    #         tibble(
    #           PValue = seq(0.01, 1, by = 0.01),
    #           `1 - DAR` = dbeta(PValue, 1 - min_dar, 1),
    #           `1 - DAR^2` = dbeta(PValue, 1 - min_dar^2, 1),
    #           `sqrt(1 - DAR)` = dbeta(PValue, sqrt(1 - min_dar), 1),
    #         )
    #       },
    #       simplify = FALSE
    #     ) %>%
    #     bind_rows(.id = "dar_q") %>%
    #     pivot_longer(
    #       cols = contains("- DAR"), names_to = "f", values_to = "y"
    #     ) %>%
    #     mutate(dar_q = fct_inorder(dar_q)),
    # ) +
    geom_label(
      aes(x = 1, y = 0, label = label),
      data = . %>%
        summarise(n = dplyr::n(), .by = dar_q) %>%
        mutate(label = paste("n =", comma(n))),
      hjust = 1, vjust = 0, alpha = 0.7
    ) +
    facet_wrap(~dar_q, scales = "free_y") +
    labs(title = x)
})

beta_fits_A <- sapply(names(res_A), simplify = FALSE, function(x){
  res_A[[x]] %>%
    as_tibble() %>%
    dplyr::select(
      range, starts_with("gene"), starts_with("log"), PValue, FDR
    ) %>%
    left_join(
      mcols(gene_dar_A[[x]]) %>% as_tibble() %>% dplyr::select(gene_id, dar)
    ) %>%
    dplyr::filter(!is.na(dar)) %>%
    # mutate(dar_q = cut(dar, seq(0, 1, length.out = 5))) %>%
    mutate(
      dar_q = cut(
        dar,
        unique(quantile(dar, probs = seq(0, 1, length.out = 26))),
        include.lowest = TRUE
      )
    ) %>%
    split(.$dar_q) %>%
    lapply(
      \(x) {
        tryCatch({  # !!
          fit <- MASS::fitdistr(x$PValue, "beta", list(shape1 = 1, shape2 = 1))
          tibble(
            dar = median(x$dar),
            shape1 = fit$estimate["shape1"],
            se1 = fit$sd["shape1"],
            shape2 = fit$estimate["shape2"],
            se2 = fit$sd["shape2"]
          )
        }, error = \(e){
          tibble(
            dar = median(x$dar), shape1 = NA, se1 = NA, shape2 = NA, se2 = NA
          )
        })
      }
    ) %>%
    bind_rows() %>%
    mutate(shape1_norm = shape1 / max(shape1, na.rm = TRUE))
})
sapply(beta_fits_A, simplify = FALSE, function(x){
  as.data.frame(x)
})
## $eofad
##           dar    shape1        se1    shape2        se2 shape1_norm
## 1  0.00000000 1.3642048 0.04688397 0.9997116 0.03245841   0.8854227
## 2  0.01047562 1.4269500 0.06963735 1.0240268 0.04709860   0.9261467
## 3  0.01785714 1.3744139 0.06697831 0.9872775 0.04528154   0.8920488
## 4  0.02698888 1.3864142 0.06459644 0.9988075 0.04384092   0.8998374
## 5  0.03569551 1.3813918 0.07025759 1.0669902 0.05183133   0.8965777
## 6  0.04300952 1.4297747 0.06944548 1.0746444 0.04964836   0.9279801
## 7  0.04990783 1.3573061 0.06584545 1.0210368 0.04703406   0.8809451
## 8  0.05582194 1.5407385 0.07571572 1.0321870 0.04731769   1.0000000
## 9  0.06250000 1.4773843 0.06494681 1.0856678 0.04525427   0.9588806
## 10 0.06500510 1.4080831 0.07833010 0.9796134 0.05100568   0.9139014
## 11 0.07029937 1.3803428 0.06667374 1.0966264 0.05086649   0.8958969
## 12 0.07638889 1.3718891 0.06624127 1.0842312 0.05021382   0.8904101
## 13 0.08333333 1.2587474 0.06069451 0.9926840 0.04577307   0.8169766
## 14 0.09036797 1.2739664 0.06123775 1.0143436 0.04673370   0.8268544
## 15 0.09777917 1.1628298 0.05594791 0.9436833 0.04357940   0.7547224
## 16 0.10680465 1.2222256 0.05849145 1.0250134 0.04745795   0.7932726
## 17 0.11562995 1.1718144 0.05603150 1.0071480 0.04678010   0.7605537
## 18 0.12500000 1.3053243 0.06300268 1.0287082 0.04752663   0.8472069
## 19 0.13448338 1.1103555 0.05274494 0.9940575 0.04620816   0.7206645
## 20 0.14929218 1.0873698 0.05160685 0.9753787 0.04530417   0.7057459
## 21 0.16928166 1.0017117 0.04699863 0.9928652 0.04650001   0.6501504
## 22 0.18760982 1.0236153 0.04800416 1.0322638 0.04849069   0.6643667
## 23 0.22321429 0.8427165 0.03752271 1.0526388 0.04910974   0.5469562
## 24 0.29940476 0.6808290 0.03141921 0.9393957 0.04668868   0.4418848
## 
## $mps
##           dar    shape1        se1    shape2        se2 shape1_norm
## 1  0.00000000 0.9737913 0.03222091 0.9790729 0.03243162   1.0000000
## 2  0.01057899 0.9114516 0.04240630 0.9455825 0.04433756   0.9359825
## 3  0.01785714 0.9612079 0.04463266 1.0653909 0.05050219   0.9870779
## 4  0.02709042 0.9320072 0.04228929 0.9796237 0.04491253   0.9570913
## 5  0.03621032 0.8894145 0.04259590 0.8772056 0.04188501   0.9133523
## 6  0.04305923 0.9262845 0.04325805 0.9407186 0.04407552   0.9512146
## 7  0.04996135 0.9577556 0.04470216 0.9893267 0.04648444   0.9835327
## 8  0.05605103 0.9689856 0.04508065 1.0423201 0.04921040   0.9950650
## 9  0.06250000 0.9255260 0.04058723 1.0118108 0.04519436   0.9504357
## 10 0.06547619 0.9118877 0.04519804 0.9486099 0.04741189   0.9364303
## 11 0.07132959 0.9181072 0.04277084 0.9487388 0.04450463   0.9428172
## 12 0.07775160 0.8894110 0.04112852 0.9826578 0.04640470   0.9133487
## 13 0.08488414 0.9475253 0.04410841 1.0060724 0.04741332   0.9730271
## 14 0.09261998 0.9501541 0.04411442 1.0376180 0.04904419   0.9757266
## 15 0.10033682 0.9101634 0.04221283 0.9897001 0.04671128   0.9346596
## 16 0.10927871 0.9666169 0.04497515 1.0109044 0.04746661   0.9926325
## 17 0.11845956 0.8551450 0.03943612 0.9879780 0.04698346   0.8781605
## 18 0.12579597 0.8640545 0.03962850 1.0476678 0.05001984   0.8873098
## 19 0.13745518 0.8729122 0.04019928 1.0154643 0.04827190   0.8964059
## 20 0.15138535 0.8132749 0.03733955 0.9317239 0.04407398   0.8351634
## 21 0.17013889 0.7993177 0.03646375 0.9948921 0.04759025   0.8208306
## 22 0.18839826 0.7952783 0.03618391 1.0146055 0.04865511   0.8166825
## 23 0.22576681 0.7004583 0.03052830 0.9581323 0.04484193   0.7193105
## 24 0.31307169 0.4303412 0.01905719 0.7966763 0.04157581   0.4419234

EOfAD

plotly::ggplotly({
  plotDarECDF(dar = dar_A$eofad, dar_val = "origin")
})

Before normalisation of alpha

# lm(shape1 ~ dar + I(dar^2), data = beta_fits_A$eofad) %>%
lm(shape1 ~ dar, data = beta_fits_A$eofad) %>%
  step() %>%
  summary()
## Start:  AIC=-119.88
## shape1 ~ dar
## 
##        Df Sum of Sq     RSS      AIC
## <none>              0.13759 -119.877
## - dar   1   0.86183 0.99942  -74.287
## 
## Call:
## lm(formula = shape1 ~ dar, data = beta_fits_A$eofad)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.14616 -0.05096 -0.01967  0.04179  0.18130 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.51037    0.02732   55.29  < 2e-16 ***
## dar         -2.70382    0.23033  -11.74 6.06e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.07908 on 22 degrees of freedom
## Multiple R-squared:  0.8623, Adjusted R-squared:  0.8561 
## F-statistic: 137.8 on 1 and 22 DF,  p-value: 6.057e-11
lm_A_eofad <- function(dar){
  1.53110 - 2.82662 * dar
}
beta_fits_A$eofad %>%
  mutate(
    `1 - DAR` = 1  - dar,
    `sqrt(1 - DAR)` = sqrt(1 - dar),
    `(1.1 - DAR)^2` = (1.1 - dar)^2,
    `DAR lm` = lm_A_eofad(dar)
  ) %>%
  pivot_longer(cols = matches("DAR", FALSE), names_to = "f", values_to = "estimate") %>%
  mutate(estimate = ifelse(estimate > 1, 1, estimate)) %>%
  ggplot(aes(dar, shape1)) +
  geom_point(
    data = . %>% distinct(dar, shape1)
  ) +
  geom_line(aes(y = estimate, colour = f))

darP_A_eofad <- res_A$eofad %>%
  as_tibble() %>%
  dplyr::select(
    range, starts_with("gene"), starts_with("log"), PValue, FDR
  ) %>%
  left_join(
    mcols(gene_dar_A$eofad) %>% as_tibble() %>% dplyr::select(gene_id, dar)
  ) %>%
  mutate(
    dar = ifelse(is.na(dar), 0, dar),
    alpha = lm_A_eofad(dar),
    alpha = ifelse(alpha > 1, 1, alpha),
    alpha = ifelse(alpha < 0.1, 0.1, alpha),
    darP = pbeta(PValue, alpha, 1),
    FDR_darP = p.adjust(darP, "fdr"),
    wasDE = FDR < 0.05,
    isDE = FDR_darP < 0.05
  ) %>%
  dplyr::filter(wasDE | isDE) %>%
  dplyr::arrange(!isDE, darP) %>%
  as.data.frame()
  • No genes were DE
res_A$eofad %>%
  as_tibble() %>%
  dplyr::select(
    range, starts_with("gene"), starts_with("log"), PValue, FDR
  ) %>%
  left_join(
    mcols(gene_dar_A$eofad) %>% as_tibble() %>% dplyr::select(gene_id, dar)
  ) %>%
  mutate(
    dar = ifelse(is.na(dar), 0, dar),
    alpha = lm_A_eofad(dar),
    alpha = ifelse(alpha > 1, 1, alpha),
    alpha = ifelse(alpha < 0.1, 0.1, alpha),
    darP = pbeta(PValue, alpha, 1)
  ) %>%
  pivot_longer(
    cols = all_of(c("PValue", "darP")), names_to = "type", values_to = "p"
  ) %>%
  ggplot(aes(sample = -log(p), colour = type)) +
  stat_qq(distribution = qexp) +
  stat_qq_line(distribution = qexp) +
  facet_wrap(~type) +
  scale_colour_brewer(palette= "Set1")

After normalisation of alpha

# lm(shape1_norm ~ dar + I(dar^2), data = beta_fits_A$eofad) %>%
lm(shape1_norm ~ dar, data = beta_fits_A$eofad) %>%
  step() %>%
  summary()
## Start:  AIC=-140.63
## shape1_norm ~ dar
## 
##        Df Sum of Sq     RSS      AIC
## <none>              0.05796 -140.626
## - dar   1   0.36305 0.42101  -95.036
## 
## Call:
## lm(formula = shape1_norm ~ dar, data = beta_fits_A$eofad)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.09486 -0.03307 -0.01277  0.02712  0.11767 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.98029    0.01773   55.29  < 2e-16 ***
## dar         -1.75489    0.14949  -11.74 6.06e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.05133 on 22 degrees of freedom
## Multiple R-squared:  0.8623, Adjusted R-squared:  0.8561 
## F-statistic: 137.8 on 1 and 22 DF,  p-value: 6.057e-11
lm_A_eofad_norm <- function(dar){
  1.14213 - 2.10852 * dar
}
dar_tmp <- dar_A$eofad
lm_all <- tibble(
  dataset = "A_eofad",
  intercept = 1.14213,
  slope = -2.10852,
  dar_mean = mean(dar_tmp$dar_origin),
  dar_median = median(dar_tmp$dar_origin),
  dar_mean_0 = mean(dar_tmp$dar_origin[dar_tmp$dar_origin != 0]),
  dar_median_0 = median(dar_tmp$dar_origin[dar_tmp$dar_origin != 0])
)
beta_fits_A$eofad %>%
  mutate(
    `1 - DAR` = 1  - dar,
    `sqrt(1 - DAR)` = sqrt(1 - dar),
    `(1.1 - DAR)^2` = (1.1 - dar)^2,
    `DAR lm` = lm_A_eofad_norm(dar)
  ) %>%
  pivot_longer(cols = matches("DAR", FALSE), names_to = "f", values_to = "estimate") %>%
  mutate(estimate = ifelse(estimate > 1, 1, estimate)) %>%
  ggplot(aes(dar, shape1_norm)) +
  geom_point(
    data = . %>% distinct(dar, shape1_norm)
  ) +
  geom_line(aes(y = estimate, colour = f))

darP_A_eofad_norm <- res_A$eofad %>%
  as_tibble() %>%
  dplyr::select(
    range, starts_with("gene"), starts_with("log"), PValue, FDR
  ) %>%
  left_join(
    mcols(gene_dar_A$eofad) %>% as_tibble() %>% dplyr::select(gene_id, dar)
  ) %>%
  mutate(
    dar = ifelse(is.na(dar), 0, dar),
    alpha = lm_A_eofad_norm(dar),
    alpha = ifelse(alpha > 1, 1, alpha),
    alpha = ifelse(alpha < 0.1, 0.1, alpha),
    darP = pbeta(PValue, alpha, 1),
    FDR_darP = p.adjust(darP, "fdr"),
    wasDE = FDR < 0.05,
    isDE = FDR_darP < 0.05
  ) %>%
  dplyr::filter(wasDE | isDE) %>%
  dplyr::arrange(!isDE, darP) %>%
  as.data.frame()
  • No genes were DE
res_A$eofad %>%
  as_tibble() %>%
  dplyr::select(
    range, starts_with("gene"), starts_with("log"), PValue, FDR
  ) %>%
  left_join(
    mcols(gene_dar_A$eofad) %>% as_tibble() %>% dplyr::select(gene_id, dar)
  ) %>%
  mutate(
    dar = ifelse(is.na(dar), 0, dar),
    alpha = lm_A_eofad_norm(dar),
    alpha = ifelse(alpha > 1, 1, alpha),
    alpha = ifelse(alpha < 0.1, 0.1, alpha),
    darP = pbeta(PValue, alpha, 1)
  ) %>%
  pivot_longer(
    cols = all_of(c("PValue", "darP")), names_to = "type", values_to = "p"
  ) %>%
  ggplot(aes(sample = -log(p), colour = type)) +
  stat_qq(distribution = qexp) +
  stat_qq_line(distribution = qexp) +
  facet_wrap(~type) +
  scale_colour_brewer(palette= "Set1")

MPS-IIIB

plotly::ggplotly({
  plotDarECDF(dar = dar_A$mps, dar_val = "origin")
})

Before normalisation of alpha

# lm(shape1 ~ dar + I(dar^2), data = beta_fits_A$mps) %>%
lm(shape1 ~ dar, data = beta_fits_A$mps) %>%
  step() %>%
  summary()
## Start:  AIC=-137.25
## shape1 ~ dar
## 
##        Df Sum of Sq      RSS     AIC
## <none>              0.066703 -137.25
## - dar   1   0.24449 0.311195 -102.29
## 
## Call:
## lm(formula = shape1 ~ dar, data = beta_fits_A$mps)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.145768 -0.027659  0.005285  0.034085  0.105399 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.01410    0.01887   53.73  < 2e-16 ***
## dar         -1.39901    0.15579   -8.98 8.23e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.05506 on 22 degrees of freedom
## Multiple R-squared:  0.7857, Adjusted R-squared:  0.7759 
## F-statistic: 80.64 on 1 and 22 DF,  p-value: 8.234e-09
lm_A_mps <- function(dar){
  1.12769 - 1.80512 * dar
}
beta_fits_A$mps %>%
  mutate(
    `1 - DAR` = 1  - dar,
    `sqrt(1 - DAR)` = sqrt(1 - dar),
    `(1.1 - DAR)^2` = (1.1 - dar)^2,
    `DAR lm` = lm_A_mps(dar)
  ) %>%
  pivot_longer(cols = matches("DAR", FALSE), names_to = "f", values_to = "estimate") %>%
  mutate(estimate = ifelse(estimate > 1, 1, estimate)) %>%
  ggplot(aes(dar, shape1)) +
  geom_point(
    data = . %>% distinct(dar, shape1)
  ) +
  geom_line(aes(y = estimate, colour = f))

darP_A_mps <- res_A$mps %>%
  as_tibble() %>%
  dplyr::select(
    range, starts_with("gene"), starts_with("log"), PValue, FDR
  ) %>%
  left_join(
    mcols(gene_dar_A$mps) %>% as_tibble() %>% dplyr::select(gene_id, dar)
  ) %>%
  mutate(
    dar = ifelse(is.na(dar), 0, dar),
    alpha = lm_A_mps(dar),
    alpha = ifelse(alpha > 1, 1, alpha),
    alpha = ifelse(alpha < 0.1, 0.1, alpha),
    darP = pbeta(PValue, alpha, 1),
    FDR_darP = p.adjust(darP, "fdr"),
    wasDE = FDR < 0.05,
    isDE = FDR_darP < 0.05
  ) %>%
  dplyr::filter(wasDE | isDE) %>%
  dplyr::arrange(!isDE, darP) %>%
  as.data.frame()
kable(darP_A_mps) %>%
  kable_paper() %>%
  column_spec(
    1:ncol(darP_A_mps),
    background = ifelse(darP_A_mps$isDE, "green4", "red3")
  )
range gene_id gene_name gene_biotype logFC logCPM PValue FDR dar alpha darP FDR_darP wasDE isDE
5:38215924-38248462:- ENSDARG00000060366 slc12a9 protein_coding 0.4031449 4.8904326 2.00e-07 0.0004054 0.0638934 1.0000000 0.0000002 0.0015241 TRUE TRUE
7:19909774-19923249:- ENSDARG00000056233 zgc:110591 protein_coding 0.4244109 4.3066556 2.00e-07 0.0004634 0.0608100 1.0000000 0.0000002 0.0015241 TRUE TRUE
8:47633438-47665026:+ ENSDARG00000007275 si:ch211-251b21.1 protein_coding 0.6842232 9.2881803 0.00e+00 0.0000192 0.1875000 0.7892300 0.0000002 0.0015241 TRUE TRUE
25:28714527-28768489:- ENSDARG00000061719 washc4 protein_coding 0.2736910 4.9829463 2.20e-06 0.0033698 0.0000000 1.0000000 0.0000022 0.0109519 TRUE TRUE
24:40763020-40774028:- ENSDARG00000103837 CU633479.2 protein_coding 0.9207708 3.8575249 5.00e-07 0.0010961 0.1453873 0.8652485 0.0000038 0.0153026 TRUE TRUE
8:29726620-29734951:+ ENSDARG00000024540 tspan36 protein_coding 0.5499149 5.7319949 5.90e-06 0.0065288 0.0008720 1.0000000 0.0000059 0.0195864 TRUE TRUE
24:32650227-32665283:- ENSDARG00000014488 ca2 protein_coding 0.4202880 4.2110484 2.90e-06 0.0035998 0.1354167 0.8832467 0.0000127 0.0364171 TRUE TRUE
5:37837245-37839310:+ ENSDARG00000103498 epd protein_coding 0.4902888 7.8277492 1.88e-05 0.0156429 0.0000000 1.0000000 0.0000188 0.0465635 TRUE TRUE
25:17589906-17613025:+ ENSDARG00000014303 vac14 protein_coding 0.3800054 3.9872145 2.90e-06 0.0035998 0.1570791 0.8441434 0.0000212 0.0465635 TRUE TRUE
11:16040517-16064291:+ ENSDARG00000077082 agtrap protein_coding 0.6484891 2.0941711 1.59e-05 0.0144971 0.0936756 0.9585943 0.0000251 0.0465635 TRUE TRUE
8:37030974-37043900:- ENSDARG00000008931 renbp protein_coding 0.5951639 2.5800845 1.01e-05 0.0101522 0.1153063 0.9195483 0.0000255 0.0465635 TRUE TRUE
22:22302614-22315323:+ ENSDARG00000010531 scamp4 protein_coding 0.2562861 4.7977629 3.90e-05 0.0260611 0.0326885 1.0000000 0.0000390 0.0651527 TRUE FALSE
16:14216581-14231312:+ ENSDARG00000076058 gba protein_coding 0.2575265 4.6510506 4.23e-05 0.0263202 0.0189639 1.0000000 0.0000423 0.0653022 TRUE FALSE
25:18889332-18906871:+ ENSDARG00000015732 cax2 protein_coding 0.5076029 3.9992018 1.00e-07 0.0001867 0.2924278 0.5998227 0.0000489 0.0701200 TRUE FALSE
19:798589-808265:- ENSDARG00000059354 glmp protein_coding 0.2334236 4.9059589 5.50e-05 0.0315333 0.0045249 1.0000000 0.0000550 0.0735777 TRUE FALSE
13:18073211-18122333:- ENSDARG00000062532 washc2c protein_coding 0.1868788 6.4142829 8.48e-05 0.0459874 0.0452385 1.0000000 0.0000848 0.1063458 TRUE FALSE
24:41466841-41478917:- ENSDARG00000114762 CABZ01084131.1 protein_coding 0.5351594 3.5400987 4.11e-05 0.0263202 0.1373178 0.8798149 0.0001382 0.1260880 TRUE FALSE
2:20555394-20599315:- ENSDARG00000075487 si:ch211-267e7.3 protein_coding -0.5567146 1.9374349 8.75e-05 0.0461998 0.1178659 0.9149278 0.0001937 0.1619729 TRUE FALSE
4:5848229-5850633:+ ENSDARG00000099560 lyrm5a protein_coding 0.4614141 3.0222516 4.33e-05 0.0263202 0.1579423 0.8425852 0.0002105 0.1689560 TRUE FALSE
24:40728978-40731305:- ENSDARG00000114818 CU633479.5 protein_coding 0.6986417 4.4674747 1.95e-05 0.0156429 0.2158306 0.7380898 0.0003337 0.2232535 TRUE FALSE
15:26552948-26570948:- ENSDARG00000079702 wdr81 protein_coding 0.2943020 5.9871210 2.50e-06 0.0035832 0.2921738 0.6002812 0.0004337 0.2486555 TRUE FALSE
25:18905472-18913336:- ENSDARG00000101725 parietopsin protein_coding 0.6208850 3.6948674 1.10e-06 0.0020332 0.3178795 0.5538794 0.0005044 0.2595579 TRUE FALSE
21:13785969-13796940:+ ENSDARG00000076092 slc31a2 protein_coding 0.4162170 3.0021726 2.78e-05 0.0199521 0.2258850 0.7199404 0.0005253 0.2635472 TRUE FALSE
24:33552198-33578392:- ENSDARG00000056909 LO018309.1 protein_coding -1.2242952 1.5011888 1.68e-05 0.0146180 0.2916667 0.6011967 0.0013451 0.4498877 TRUE FALSE
17:32622931-32631514:+ ENSDARG00000055120 ctsba protein_coding 0.7909985 7.2871241 6.26e-05 0.0348852 0.2630109 0.6529237 0.0018005 0.5161809 TRUE FALSE
24:39443874-39511864:+ ENSDARG00000068468 dnah3 protein_coding 0.8253023 1.2324678 2.25e-05 0.0170458 0.4293025 0.3527475 0.0229206 0.8813342 TRUE FALSE
24:37631632-37640705:- ENSDARG00000004274 zgc:112496 protein_coding 0.7992052 0.4481920 3.03e-05 0.0209476 0.4699074 0.2794507 0.0545972 0.9459566 TRUE FALSE
24:37406535-37429066:+ ENSDARG00000092499 si:ch211-183d21.1 protein_coding -0.8246218 4.9044281 0.00e+00 0.0000001 0.9370246 0.1000000 0.0717002 0.9459566 TRUE FALSE
24:36903172-36910224:- ENSDARG00000036304 dnaaf3l protein_coding 2.4162852 -0.3139137 0.00e+00 0.0000002 0.6285714 0.1000000 0.0835136 0.9459566 TRUE FALSE
24:36798558-36802891:- ENSDARG00000061638 dcakd protein_coding -0.8559429 2.6731254 0.00e+00 0.0000007 0.9479167 0.1000000 0.0999672 0.9459566 TRUE FALSE
24:36452924-36524504:+ ENSDARG00000101812 zgc:114120 protein_coding 0.7117346 1.7438925 6.90e-06 0.0073400 0.5208333 0.1875233 0.1078311 0.9459566 TRUE FALSE
24:37709191-37721739:+ ENSDARG00000045257 decr2 protein_coding -0.7645817 2.7648802 2.29e-05 0.0170458 0.5179436 0.1927396 0.1275792 0.9459566 TRUE FALSE
24:36129216-36136401:- ENSDARG00000114991 tmem14cb protein_coding -1.5800575 -0.1447143 0.00e+00 0.0000819 0.6805556 0.1000000 0.1702015 0.9459566 TRUE FALSE
24:37484661-37531540:+ ENSDARG00000079241 wdr90 protein_coding -1.2100636 2.2651867 0.00e+00 0.0000873 0.7692294 0.1000000 0.1744485 0.9459566 TRUE FALSE
24:38155830-38191962:+ ENSDARG00000071460 si:ch211-234p6.5 protein_coding -0.5346074 3.2638178 1.40e-06 0.0023490 0.5695445 0.1000000 0.2598704 0.9521641 TRUE FALSE
24:39105051-39114123:+ ENSDARG00000075463 mss51 protein_coding -0.7148683 5.0518228 3.00e-06 0.0035998 0.8121222 0.1000000 0.2808165 0.9521641 TRUE FALSE
24:39638555-39654132:+ ENSDARG00000055903 luc7l protein_coding 0.2276765 6.3022584 1.41e-05 0.0134942 0.8058615 0.1000000 0.3273303 0.9595125 TRUE FALSE
24:38098896-38099675:- ENSDARG00000093671 BX001030.1 unprocessed_pseudogene 0.9300512 0.7739344 5.47e-05 0.0315333 0.6125000 0.1000000 0.3747940 0.9595125 TRUE FALSE
res_A$mps %>%
  as_tibble() %>%
  dplyr::select(
    range, starts_with("gene"), starts_with("log"), PValue, FDR
  ) %>%
  left_join(
    mcols(gene_dar_A$mps) %>% as_tibble() %>% dplyr::select(gene_id, dar)
  ) %>%
  mutate(
    dar = ifelse(is.na(dar), 0, dar),
    alpha = lm_A_mps(dar),
    alpha = ifelse(alpha > 1, 1, alpha),
    alpha = ifelse(alpha < 0.1, 0.1, alpha),
    darP = pbeta(PValue, alpha, 1)
  ) %>%
  pivot_longer(
    cols = all_of(c("PValue", "darP")), names_to = "type", values_to = "p"
  ) %>%
  ggplot(aes(sample = -log(p), colour = type)) +
  stat_qq(distribution = qexp) +
  stat_qq_line(distribution = qexp) +
  facet_wrap(~type) +
  scale_colour_brewer(palette= "Set1")

After normalisation of alpha

# lm(shape1_norm ~ dar + I(dar^2), data = beta_fits_A$mps) %>%
lm(shape1_norm ~ dar, data = beta_fits_A$mps) %>%
  step() %>%
  summary()
## Start:  AIC=-135.98
## shape1_norm ~ dar
## 
##        Df Sum of Sq     RSS     AIC
## <none>              0.07034 -135.98
## - dar   1   0.25783 0.32817 -101.02
## 
## Call:
## lm(formula = shape1_norm ~ dar, data = beta_fits_A$mps)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.149691 -0.028403  0.005427  0.035002  0.108236 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.04139    0.01938   53.73  < 2e-16 ***
## dar         -1.43667    0.15999   -8.98 8.23e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.05655 on 22 degrees of freedom
## Multiple R-squared:  0.7857, Adjusted R-squared:  0.7759 
## F-statistic: 80.64 on 1 and 22 DF,  p-value: 8.234e-09
lm_A_mps_norm <- function(dar){
  1.13861 - 1.82260 * dar
}
dar_tmp <- dar_A$mps
lm_all <- lm_all %>%
  rbind(tibble(
    dataset = "A_mps",
    intercept = 1.13861,
    slope = -1.82260,
    dar_mean = mean(dar_tmp$dar_origin),
    dar_median = median(dar_tmp$dar_origin),
    dar_mean_0 = mean(dar_tmp$dar_origin[dar_tmp$dar_origin != 0]),
    dar_median_0 = median(dar_tmp$dar_origin[dar_tmp$dar_origin != 0])
  ))
beta_fits_A$mps %>%
  mutate(
    `1 - DAR` = 1  - dar,
    `sqrt(1 - DAR)` = sqrt(1 - dar),
    `(1.1 - DAR)^2` = (1.1 - dar)^2,
    `DAR lm` = lm_A_mps_norm(dar)
  ) %>%
  pivot_longer(cols = matches("DAR", FALSE), names_to = "f", values_to = "estimate") %>%
  mutate(estimate = ifelse(estimate > 1, 1, estimate)) %>%
  ggplot(aes(dar, shape1_norm)) +
  geom_point(
    data = . %>% distinct(dar, shape1_norm)
  ) +
  geom_line(aes(y = estimate, colour = f))

darP_A_mps_norm <- res_A$mps %>%
  as_tibble() %>%
  dplyr::select(
    range, starts_with("gene"), starts_with("log"), PValue, FDR
  ) %>%
  left_join(
    mcols(gene_dar_A$mps) %>% as_tibble() %>% dplyr::select(gene_id, dar)
  ) %>%
  mutate(
    dar = ifelse(is.na(dar), 0, dar),
    alpha = lm_A_mps_norm(dar),
    alpha = ifelse(alpha > 1, 1, alpha),
    alpha = ifelse(alpha < 0.1, 0.1, alpha),
    darP = pbeta(PValue, alpha, 1),
    FDR_darP = p.adjust(darP, "fdr"),
    wasDE = FDR < 0.05,
    isDE = FDR_darP < 0.05
  ) %>%
  dplyr::filter(wasDE | isDE) %>%
  dplyr::arrange(!isDE, darP) %>%
  as.data.frame()
kable(darP_A_mps_norm) %>%
  kable_paper() %>%
  column_spec(
    1:ncol(darP_A_mps_norm),
    background = ifelse(darP_A_mps_norm$isDE, "green4", "red3")
  )
range gene_id gene_name gene_biotype logFC logCPM PValue FDR dar alpha darP FDR_darP wasDE isDE
5:38215924-38248462:- ENSDARG00000060366 slc12a9 protein_coding 0.4031449 4.8904326 2.00e-07 0.0004054 0.0638934 1.0000000 0.0000002 0.0013902 TRUE TRUE
8:47633438-47665026:+ ENSDARG00000007275 si:ch211-251b21.1 protein_coding 0.6842232 9.2881803 0.00e+00 0.0000192 0.1875000 0.7968725 0.0000002 0.0013902 TRUE TRUE
7:19909774-19923249:- ENSDARG00000056233 zgc:110591 protein_coding 0.4244109 4.3066556 2.00e-07 0.0004634 0.0608100 1.0000000 0.0000002 0.0013902 TRUE TRUE
25:28714527-28768489:- ENSDARG00000061719 washc4 protein_coding 0.2736910 4.9829463 2.20e-06 0.0033698 0.0000000 1.0000000 0.0000022 0.0109519 TRUE TRUE
24:40763020-40774028:- ENSDARG00000103837 CU633479.2 protein_coding 0.9207708 3.8575249 5.00e-07 0.0010961 0.1453873 0.8736271 0.0000034 0.0135611 TRUE TRUE
8:29726620-29734951:+ ENSDARG00000024540 tspan36 protein_coding 0.5499149 5.7319949 5.90e-06 0.0065288 0.0008720 1.0000000 0.0000059 0.0195864 TRUE TRUE
24:32650227-32665283:- ENSDARG00000014488 ca2 protein_coding 0.4202880 4.2110484 2.90e-06 0.0035998 0.1354167 0.8917996 0.0000114 0.0326508 TRUE TRUE
5:37837245-37839310:+ ENSDARG00000103498 epd protein_coding 0.4902888 7.8277492 1.88e-05 0.0156429 0.0000000 1.0000000 0.0000188 0.0420309 TRUE TRUE
25:17589906-17613025:+ ENSDARG00000014303 vac14 protein_coding 0.3800054 3.9872145 2.90e-06 0.0035998 0.1570791 0.8523177 0.0000191 0.0420309 TRUE TRUE
11:16040517-16064291:+ ENSDARG00000077082 agtrap protein_coding 0.6484891 2.0941711 1.59e-05 0.0144971 0.0936756 0.9678769 0.0000227 0.0420309 TRUE TRUE
8:37030974-37043900:- ENSDARG00000008931 renbp protein_coding 0.5951639 2.5800845 1.01e-05 0.0101522 0.1153063 0.9284528 0.0000230 0.0420309 TRUE TRUE
22:22302614-22315323:+ ENSDARG00000010531 scamp4 protein_coding 0.2562861 4.7977629 3.90e-05 0.0260611 0.0326885 1.0000000 0.0000390 0.0636944 TRUE FALSE
16:14216581-14231312:+ ENSDARG00000076058 gba protein_coding 0.2575265 4.6510506 4.23e-05 0.0263202 0.0189639 1.0000000 0.0000423 0.0636944 TRUE FALSE
25:18889332-18906871:+ ENSDARG00000015732 cax2 protein_coding 0.5076029 3.9992018 1.00e-07 0.0001867 0.2924278 0.6056311 0.0000444 0.0636944 TRUE FALSE
19:798589-808265:- ENSDARG00000059354 glmp protein_coding 0.2334236 4.9059589 5.50e-05 0.0315333 0.0045249 1.0000000 0.0000550 0.0735777 TRUE FALSE
13:18073211-18122333:- ENSDARG00000062532 washc2c protein_coding 0.1868788 6.4142829 8.48e-05 0.0459874 0.0452385 1.0000000 0.0000848 0.1063458 TRUE FALSE
24:41466841-41478917:- ENSDARG00000114762 CABZ01084131.1 protein_coding 0.5351594 3.5400987 4.11e-05 0.0263202 0.1373178 0.8883346 0.0001268 0.1167662 TRUE FALSE
2:20555394-20599315:- ENSDARG00000075487 si:ch211-267e7.3 protein_coding -0.5567146 1.9374349 8.75e-05 0.0461998 0.1178659 0.9237875 0.0001783 0.1491039 TRUE FALSE
4:5848229-5850633:+ ENSDARG00000099560 lyrm5a protein_coding 0.4614141 3.0222516 4.33e-05 0.0263202 0.1579423 0.8507444 0.0001939 0.1556573 TRUE FALSE
24:40728978-40731305:- ENSDARG00000114818 CU633479.5 protein_coding 0.6986417 4.4674747 1.95e-05 0.0156429 0.2158306 0.7452371 0.0003089 0.2137254 TRUE FALSE
15:26552948-26570948:- ENSDARG00000079702 wdr81 protein_coding 0.2943020 5.9871210 2.50e-06 0.0035832 0.2921738 0.6060940 0.0004023 0.2380284 TRUE FALSE
25:18905472-18913336:- ENSDARG00000101725 parietopsin protein_coding 0.6208850 3.6948674 1.10e-06 0.0020332 0.3178795 0.5592429 0.0004687 0.2525877 TRUE FALSE
21:13785969-13796940:+ ENSDARG00000076092 slc31a2 protein_coding 0.4162170 3.0021726 2.78e-05 0.0199521 0.2258850 0.7269119 0.0004883 0.2525877 TRUE FALSE
24:33552198-33578392:- ENSDARG00000056909 LO018309.1 protein_coding -1.2242952 1.5011888 1.68e-05 0.0146180 0.2916667 0.6070183 0.0012617 0.4360714 TRUE FALSE
17:32622931-32631514:+ ENSDARG00000055120 ctsba protein_coding 0.7909985 7.2871241 6.26e-05 0.0348852 0.2630109 0.6592463 0.0016936 0.4998201 TRUE FALSE
24:39443874-39511864:+ ENSDARG00000068468 dnah3 protein_coding 0.8253023 1.2324678 2.25e-05 0.0170458 0.4293025 0.3561633 0.0220977 0.8629069 TRUE FALSE
24:37631632-37640705:- ENSDARG00000004274 zgc:112496 protein_coding 0.7992052 0.4481920 3.03e-05 0.0209476 0.4699074 0.2821568 0.0530813 0.9391996 TRUE FALSE
24:37406535-37429066:+ ENSDARG00000092499 si:ch211-183d21.1 protein_coding -0.8246218 4.9044281 0.00e+00 0.0000001 0.9370246 0.1000000 0.0717002 0.9391996 TRUE FALSE
24:36903172-36910224:- ENSDARG00000036304 dnaaf3l protein_coding 2.4162852 -0.3139137 0.00e+00 0.0000002 0.6285714 0.1000000 0.0835136 0.9391996 TRUE FALSE
24:36798558-36802891:- ENSDARG00000061638 dcakd protein_coding -0.8559429 2.6731254 0.00e+00 0.0000007 0.9479167 0.1000000 0.0999672 0.9391996 TRUE FALSE
24:36452924-36524504:+ ENSDARG00000101812 zgc:114120 protein_coding 0.7117346 1.7438925 6.90e-06 0.0073400 0.5208333 0.1893392 0.1055305 0.9391996 TRUE FALSE
24:37709191-37721739:+ ENSDARG00000045257 decr2 protein_coding -0.7645817 2.7648802 2.29e-05 0.0170458 0.5179436 0.1946060 0.1250607 0.9391996 TRUE FALSE
24:36129216-36136401:- ENSDARG00000114991 tmem14cb protein_coding -1.5800575 -0.1447143 0.00e+00 0.0000819 0.6805556 0.1000000 0.1702015 0.9391996 TRUE FALSE
24:37484661-37531540:+ ENSDARG00000079241 wdr90 protein_coding -1.2100636 2.2651867 0.00e+00 0.0000873 0.7692294 0.1000000 0.1744485 0.9391996 TRUE FALSE
24:38155830-38191962:+ ENSDARG00000071460 si:ch211-234p6.5 protein_coding -0.5346074 3.2638178 1.40e-06 0.0023490 0.5695445 0.1005581 0.2579232 0.9462021 TRUE FALSE
24:39105051-39114123:+ ENSDARG00000075463 mss51 protein_coding -0.7148683 5.0518228 3.00e-06 0.0035998 0.8121222 0.1000000 0.2808165 0.9462021 TRUE FALSE
24:39638555-39654132:+ ENSDARG00000055903 luc7l protein_coding 0.2276765 6.3022584 1.41e-05 0.0134942 0.8058615 0.1000000 0.3273303 0.9545611 TRUE FALSE
24:38098896-38099675:- ENSDARG00000093671 BX001030.1 unprocessed_pseudogene 0.9300512 0.7739344 5.47e-05 0.0315333 0.6125000 0.1000000 0.3747940 0.9558181 TRUE FALSE
res_A$mps %>%
  as_tibble() %>%
  dplyr::select(
    range, starts_with("gene"), starts_with("log"), PValue, FDR
  ) %>%
  left_join(
    mcols(gene_dar_A$mps) %>% as_tibble() %>% dplyr::select(gene_id, dar)
  ) %>%
  mutate(
    dar = ifelse(is.na(dar), 0, dar),
    alpha = lm_A_mps_norm(dar),
    alpha = ifelse(alpha > 1, 1, alpha),
    alpha = ifelse(alpha < 0.1, 0.1, alpha),
    darP = pbeta(PValue, alpha, 1)
  ) %>%
  pivot_longer(
    cols = all_of(c("PValue", "darP")), names_to = "type", values_to = "p"
  ) %>%
  ggplot(aes(sample = -log(p), colour = type)) +
  stat_qq(distribution = qexp) +
  stat_qq_line(distribution = qexp) +
  facet_wrap(~type) +
  scale_colour_brewer(palette= "Set1")

Dataset B (zebrafish)

Sample structure

  • V1482Afs/+ (\(n = 6\), sorl1 gene, Alzheimer’s disease model)
  • R122Pfs/+ (\(n = 4\), sorl1 gene, Alzheimer’s disease model)
  • V1482Afs/R122Pfs (\(n = 6\), sorl1 gene, transheterozygotes)
  • WT (\(n = 8\))

Load data

meta_B <- read_csv(here("data/metadata/sorl1.csv")) %>%
  as.data.frame() %>%
  dplyr::arrange(Run) %>%
  mutate(
    genotype = case_when(
      Genotype == "wild-type" ~ "WT",
      Genotype == "sorl1V1482Afs/+" ~ "V1482Afs_het",
      Genotype == "sorl1R122Pfs/+" ~ "R122Pfs_het",
      Genotype == "sorl1V1482Afs/R122Pfs" ~ "Trans",
    ),
    genotype = factor(
      genotype,
      levels = c("WT", "V1482Afs_het", "R122Pfs_het", "Trans")
    ),
    alias = c(
      paste0(rep("WT", 8), seq(1, 8)),
      paste0(rep("V1482Afs", 6), seq(1, 6)),
      paste0(rep("R122Pfs", 4), seq(1, 4)),
      paste0(rep("Trans", 6), seq(1, 6))
    )
  ) %>%
  dplyr::select(
    sample = Run, genotype, genotype2 = Genotype, alias, gender, tank = Tank
  )
dar_B <- read_rds(here("data/dar/dar_fixed_B.Rds")) %>%
  sortSeqlevels() %>%
  endoapply(sort)
gene_dar_B <- read_rds("data/dar/gene_dar_fixed_B.Rds") %>%
  sortSeqlevels() %>%
  endoapply(sort)
dge_B <- read_rds("data/de/dgeList_sorl1_cqn.Rds")
dge_B <- DGEList(
  counts = dge_B$counts,
  samples = dge_B$samples,
  genes = dge_B$genes,
  offset = dge_B$offset
)
dge_B <- dge_B %>%
  estimateDisp(design = model.matrix(~0 + genotype, .$sample))
colnames(dge_B$design) <- str_remove_all(colnames(dge_B$design), "^genotype")
contrasts_B <- makeContrasts(
  V1482Afs = V1482Afs_het - WT,
  R122Pfs = R122Pfs_het - WT,
  trans = Trans - WT,
  levels = unique(dge_B$samples$genotype)
)
## Rerun the analysis using glmQL-Fits
res_B <- lapply(colnames(contrasts_B), function(x){
  dge_B %>%
    glmQLFit() %>%
    glmQLFTest(contrast = contrasts_B[,x]) %>%
    # glmFit() %>%
    # glmLRT(contrast = contrasts_B[,x]) %>%
    topTags(n = Inf) %>%
    .[["table"]] %>%
    makeGRangesFromDataFrame(keep.extra.columns = TRUE) %>%
    sortSeqlevels() %>%
    sort()
}) %>%
  set_names(colnames(contrasts_B))

Exploration

lapply(names(res_B), function(x){
  res_B[[x]] %>%
    mcols() %>%
    as_tibble() %>%
    dplyr::select(
      starts_with("gene"), starts_with("log"), PValue, FDR
    ) %>%
    left_join(
      mcols(gene_dar_B[[x]]) %>% as_tibble() %>% dplyr::select(gene_id, dar)
    ) %>%
    ggplot(aes(dar, -log10(PValue))) +
    geom_point() +
    labs(title = x)
})

lapply(names(res_B), function(x){
  res_B[[x]] %>%
    mcols() %>%
    as_tibble() %>%
    dplyr::select(
      starts_with("gene"), starts_with("log"), PValue, FDR
    ) %>%
    left_join(
      mcols(gene_dar_B[[x]]) %>% as_tibble() %>% dplyr::select(gene_id, dar)
    ) %>%
    mutate(
      dar_q = cut(
        dar,
        breaks = quantile(dar, probs = seq(0, 1, length.out = 21), na.rm = TRUE),
        # breaks = seq(0, 1, length.out = 7),
        include.lowest = TRUE
      ) %>%
        fct_na_value_to_level(levels(.)[[1]])
    ) %>%
    ggplot(aes(PValue, after_stat(density))) +
    geom_histogram(fill = "grey70", colour = "black", binwidth = 0.01) +
    geom_line(
      ## Add 1 - dar^2 in red
      aes(y = y, colour = f),
      data = . %>%
        pull(dar_q) %>%
        levels() %>%
        sapply(
          \(x) {
            min_dar = str_replace_all(x, "^[^0-9]([0-9\\.]+),.+", "\\1") %>% as.numeric()
            tibble(
              PValue = seq(0.01, 1, by = 0.01),
              `1 - DAR` = dbeta(PValue, 1 - min_dar, 1),
              `1 - DAR^2` = dbeta(PValue, 1 - min_dar^2, 1),
              `sqrt(1 - DAR)` = dbeta(PValue, sqrt(1 - min_dar), 1),
            )
          },
          simplify = FALSE
        ) %>%
        bind_rows(.id = "dar_q") %>%
        pivot_longer(
          cols = contains("- DAR"), names_to = "f", values_to = "y"
        ) %>%
        mutate(dar_q = fct_inorder(dar_q)),
    ) +
    geom_label(
      aes(x = 1, y = 0, label = label),
      data = . %>%
        summarise(n = dplyr::n(), .by = dar_q) %>%
        mutate(label = paste("n =", comma(n))),
      hjust = 1, vjust = 0, alpha = 0.7
    ) +
    facet_wrap(~dar_q, scales = "free_y") +
    labs(title = x)
})

beta_fits_B <- sapply(names(res_B), simplify = FALSE, function(x){
  res_B[[x]] %>%
    as_tibble() %>%
    dplyr::select(
      range, starts_with("gene"), starts_with("log"), PValue, FDR
    ) %>%
    left_join(
      mcols(gene_dar_B[[x]]) %>% as_tibble() %>% dplyr::select(gene_id, dar)
    ) %>%
    dplyr::filter(!is.na(dar)) %>%
    # mutate(dar_q = cut(dar, seq(0, 1, length.out = 5))) %>%
    mutate(
      dar_q = cut(
        dar,
        unique(quantile(dar, probs = seq(0, 1, length.out = 26))),
        include.lowest = TRUE
      )
    ) %>%
    split(.$dar_q) %>%
    lapply(
      \(x) {
        tryCatch({  # !!
          fit <- MASS::fitdistr(x$PValue, "beta", list(shape1 = 1, shape2 = 1))
          tibble(
            dar = median(x$dar),
            shape1 = fit$estimate["shape1"],
            se1 = fit$sd["shape1"],
            shape2 = fit$estimate["shape2"],
            se2 = fit$sd["shape2"]
          )
        }, error = \(e){
          tibble(
            dar = median(x$dar), shape1 = NA, se1 = NA, shape2 = NA, se2 = NA
          )
        })
      }
    ) %>%
    bind_rows() %>%
    mutate(shape1_norm = shape1 / max(shape1, na.rm = TRUE))
})
sapply(beta_fits_B, simplify = FALSE, function(x){
  as.data.frame(x)
})
## $V1482Afs
##           dar    shape1        se1    shape2        se2 shape1_norm
## 1  0.00000000 1.2751730 0.07099322 1.0429773 0.05595571   0.8461148
## 2  0.02500000 1.3936937 0.07785129 1.1403642 0.06152825   0.9247568
## 3  0.03971648 1.3672888 0.07680465 1.0506778 0.05629477   0.9072364
## 4  0.05000000 1.3130354 0.07361347 1.0104407 0.05397625   0.8712377
## 5  0.05984432 1.3880126 0.07492623 1.0911469 0.05645766   0.9209872
## 6  0.06619048 1.3937882 0.08155919 1.0600039 0.05908188   0.9248195
## 7  0.07287461 1.3799631 0.07714644 1.1140163 0.05998785   0.9156462
## 8  0.07924107 1.5070921 0.08583149 1.0067926 0.05339759   1.0000000
## 9  0.08427940 1.2575298 0.06953154 1.1128890 0.06018940   0.8344081
## 10 0.09067460 1.4203479 0.07951842 1.1496767 0.06207586   0.9424427
## 11 0.09736395 1.3068775 0.06997319 1.1120042 0.05784762   0.8671517
## 12 0.10359808 1.2424118 0.07147307 1.1165337 0.06299918   0.8243768
## 13 0.10937500 1.3259063 0.07354046 1.1662442 0.06325244   0.8797779
## 14 0.11607143 1.4227808 0.07953142 1.1624677 0.06278027   0.9440570
## 15 0.12404018 1.2761821 0.07092913 1.0764197 0.05800066   0.8467844
## 16 0.13133252 1.3256420 0.07405507 1.0656200 0.05723004   0.8796025
## 17 0.14045905 1.3477115 0.07516093 1.1119188 0.05993992   0.8942462
## 18 0.14898941 1.3390293 0.07454245 1.1272703 0.06087993   0.8884854
## 19 0.15969259 1.1520174 0.06318908 1.0976725 0.05966611   0.7643975
## 20 0.17085777 1.1207193 0.06142753 1.0631675 0.05768938   0.7436303
## 21 0.18569303 1.1108075 0.06087304 1.0516427 0.05702763   0.7370535
## 22 0.20121094 1.2078274 0.06624716 1.1816014 0.06455384   0.8014290
## 23 0.22330659 1.1947044 0.06567932 1.1253326 0.06119095   0.7927216
## 24 0.26420878 1.2355606 0.06792342 1.1438837 0.06201628   0.8198308
## 25 0.35729167 0.8403017 0.04487922 0.9769802 0.05389404   0.5575649
## 
## $R122Pfs
##           dar    shape1        se1    shape2        se2 shape1_norm
## 1  0.00000000 1.6787145 0.09746029 0.9905426 0.05253660   0.8204822
## 2  0.02500000 1.8614460 0.10899304 0.9400388 0.04903484   0.9097934
## 3  0.04687500 1.8396059 0.10803467 1.0486369 0.05621260   0.8991189
## 4  0.06187144 1.8234119 0.09742558 1.0052581 0.04866473   0.8912040
## 5  0.06770833 1.8194470 0.11522313 1.0163685 0.05846631   0.8892661
## 6  0.07502631 1.9315702 0.11645390 0.9962538 0.05391494   0.9440670
## 7  0.08280804 2.0460096 0.11966835 1.1084793 0.05903674   1.0000000
## 8  0.08928571 1.7448896 0.10236812 0.9403212 0.04957832   0.8528257
## 9  0.09679719 1.8904863 0.11064519 1.0385342 0.05517830   0.9239870
## 10 0.10437853 1.9057402 0.11159624 1.0322578 0.05474986   0.9314425
## 11 0.11215274 1.8691354 0.10897015 1.0587812 0.05633489   0.9135516
## 12 0.11954365 1.8411469 0.10800103 0.9825424 0.05190627   0.8998721
## 13 0.12500000 1.7973960 0.10426315 1.0718229 0.05717370   0.8784885
## 14 0.13299150 1.5969010 0.09260670 0.9435020 0.04983428   0.7804953
## 15 0.14259547 1.6762335 0.09777584 0.9535161 0.05041723   0.8192696
## 16 0.15291005 1.5496412 0.08936982 0.9723092 0.05162797   0.7573969
## 17 0.16445410 1.6950964 0.09788561 1.0663901 0.05702428   0.8284890
## 18 0.17678571 1.6190225 0.09323974 1.0341895 0.05517637   0.7913074
## 19 0.18992419 1.4388561 0.08313069 0.8768443 0.04614133   0.7032499
## 20 0.20838392 1.5563249 0.08967183 0.9945509 0.05297371   0.7606635
## 21 0.23405704 1.4337323 0.08160129 1.0200680 0.05463318   0.7007456
## 22 0.26078869 1.5051914 0.08621486 1.0090487 0.05385909   0.7356717
## 23 0.30000000 1.2954290 0.07324948 0.9674776 0.05178135   0.6331490
## 24 0.35375616 1.0526885 0.05848550 0.8932853 0.04797795   0.5145081
## 25 0.48214286 0.8205036 0.04448293 0.8205841 0.04448828   0.4010263
## 
## $trans
##           dar    shape1        se1    shape2        se2 shape1_norm
## 1  0.00000000 1.3040392 0.07306468 1.0488687 0.05647862   0.8450706
## 2  0.02678571 1.2761967 0.07205179 0.9834770 0.05287096   0.8270275
## 3  0.04166667 1.4429798 0.08179030 1.0656314 0.05727570   0.9351098
## 4  0.05305848 1.3328594 0.07505164 1.0500585 0.05662896   0.8637472
## 5  0.06250000 1.3280911 0.07472444 1.0237714 0.05492394   0.8606572
## 6  0.06944444 1.5002304 0.08551505 1.0799902 0.05817538   0.9722105
## 7  0.07619048 1.4726310 0.08371394 1.0676040 0.05738710   0.9543249
## 8  0.08333333 1.4704375 0.08342671 1.0896881 0.05870633   0.9529034
## 9  0.08918100 1.4143899 0.07988385 1.0830002 0.05836860   0.9165824
## 10 0.09577922 1.5431127 0.08775136 1.1259488 0.06073248   1.0000000
## 11 0.10182073 1.4216419 0.08033943 1.0718892 0.05764058   0.9212820
## 12 0.10819293 1.3487550 0.07615618 1.0434054 0.05624395   0.8740483
## 13 0.11479167 1.5076057 0.08625771 1.0227050 0.05466285   0.9769900
## 14 0.12267019 1.4733556 0.08315171 1.1521079 0.06237518   0.9547945
## 15 0.12951128 1.1702214 0.06585581 0.8965300 0.04784291   0.7583512
## 16 0.13824405 1.4339779 0.08129812 1.0652680 0.05732156   0.9292762
## 17 0.14757456 1.2709339 0.07137870 0.9973728 0.05352830   0.8236170
## 18 0.15749150 1.2612978 0.07056222 1.0643976 0.05772051   0.8173724
## 19 0.16828901 1.2920545 0.07283397 0.9967640 0.05353569   0.8373040
## 20 0.18092991 1.1840186 0.06615148 0.9637927 0.05174080   0.7672923
## 21 0.19488989 1.2603250 0.07083817 0.9877302 0.05302405   0.8167420
## 22 0.21232796 1.1021508 0.06129986 0.9570211 0.05175474   0.7142387
## 23 0.23822763 1.1372496 0.06272236 1.0725716 0.05850255   0.7369841
## 24 0.28149004 0.9665891 0.05280483 0.9570122 0.05217400   0.6263892
## 25 0.40000000 0.6670882 0.03525290 0.7918470 0.04361225   0.4323003

V1482Afs

plotly::ggplotly({
  plotDarECDF(dar = dar_B$V1482Afs, dar_val = "origin")
})

Before normalisation of alpha

# lm(shape1 ~ dar + I(dar^2), data = beta_fits_B$V1482Afs) %>%
lm(shape1 ~ dar, data = beta_fits_B$V1482Afs) %>%
  step() %>%
  summary()
## Start:  AIC=-118.73
## shape1 ~ dar
## 
##        Df Sum of Sq     RSS      AIC
## <none>              0.18447 -118.729
## - dar   1   0.25571 0.44017  -98.987
## 
## Call:
## lm(formula = shape1 ~ dar, data = beta_fits_B$V1482Afs)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.17147 -0.06989  0.01897  0.04931  0.16320 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.44665    0.03365  42.987  < 2e-16 ***
## dar         -1.29680    0.22966  -5.646 9.53e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08956 on 23 degrees of freedom
## Multiple R-squared:  0.5809, Adjusted R-squared:  0.5627 
## F-statistic: 31.88 on 1 and 23 DF,  p-value: 9.525e-06
lm_B_V1482Afs <- function(dar){
  # 1.46331 - 1.41616 * dar
  1.52192 - 1.64891 * dar
}
beta_fits_B$V1482Afs %>%
  mutate(
    `1 - DAR` = 1  - dar,
    `sqrt(1 - DAR)` = sqrt(1 - dar),
    `(1.1 - DAR)^2` = (1.1 - dar)^2,
    `DAR lm` = lm_B_V1482Afs(dar)
  ) %>%
  pivot_longer(cols = matches("DAR", FALSE), names_to = "f", values_to = "estimate") %>%
  mutate(estimate = ifelse(estimate > 1, 1, estimate)) %>%
  ggplot(aes(dar, shape1)) +
  geom_point(
    data = . %>% distinct(dar, shape1)
  ) +
  geom_line(aes(y = estimate, colour = f))

darP_B_V1482Afs <- res_B$V1482Afs %>%
  as_tibble() %>%
  dplyr::select(
    range, starts_with("gene"), starts_with("log"), PValue, FDR
  ) %>%
  left_join(
    mcols(gene_dar_B$V1482Afs) %>% as_tibble() %>% dplyr::select(gene_id, dar)
  ) %>%
  mutate(
    dar = ifelse(is.na(dar), 0, dar),
    alpha = lm_B_V1482Afs(dar),
    alpha = ifelse(alpha > 1, 1, alpha),
    alpha = ifelse(alpha < 0.1, 0.1, alpha),
    darP = pbeta(PValue, alpha, 1),
    FDR_darP = p.adjust(darP, "fdr"),
    wasDE = FDR < 0.05,
    isDE = FDR_darP < 0.05
  ) %>%
  dplyr::filter(wasDE | isDE) %>%
  dplyr::arrange(!isDE, darP) %>%
  as.data.frame()
kable(darP_B_V1482Afs) %>%
  kable_paper() %>%
  column_spec(
    1:ncol(darP_B_V1482Afs),
    background = ifelse(darP_B_V1482Afs$isDE, "green4", "red3")
  )
range gene_id gene_name gene_biotype logFC logCPM PValue FDR dar alpha darP FDR_darP wasDE isDE
15:20403084-20405796:+ ENSDARG00000069464 cox7a1 protein_coding -2.241106 1.013476 2e-07 0.003347 0.4938229 0.7076505 1.78e-05 0.306842 TRUE FALSE
res_B$V1482Afs %>%
  as_tibble() %>%
  dplyr::select(
    range, starts_with("gene"), starts_with("log"), PValue, FDR
  ) %>%
  left_join(
    mcols(gene_dar_B$V1482Afs) %>% as_tibble() %>% dplyr::select(gene_id, dar)
  ) %>%
  mutate(
    dar = ifelse(is.na(dar), 0, dar),
    alpha = lm_B_V1482Afs(dar),
    alpha = ifelse(alpha > 1, 1, alpha),
    alpha = ifelse(alpha < 0.1, 0.1, alpha),
    darP = pbeta(PValue, alpha, 1)
  ) %>%
  pivot_longer(
    cols = all_of(c("PValue", "darP")), names_to = "type", values_to = "p"
  ) %>%
  ggplot(aes(sample = -log(p), colour = type)) +
  stat_qq(distribution = qexp) +
  stat_qq_line(distribution = qexp) +
  facet_wrap(~type) +
  scale_colour_brewer(palette= "Set1")

After normalisation of alpha

# lm(shape1_norm ~ dar + I(dar^2), data = beta_fits_B$V1482Afs) %>%
lm(shape1_norm ~ dar, data = beta_fits_B$V1482Afs) %>%
  step() %>%
  summary()
## Start:  AIC=-139.24
## shape1_norm ~ dar
## 
##        Df Sum of Sq      RSS     AIC
## <none>              0.081215 -139.24
## - dar   1   0.11258 0.193795 -119.50
## 
## Call:
## lm(formula = shape1_norm ~ dar, data = beta_fits_B$V1482Afs)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.11378 -0.04637  0.01259  0.03272  0.10829 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.95989    0.02233  42.987  < 2e-16 ***
## dar         -0.86046    0.15239  -5.646 9.53e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.05942 on 23 degrees of freedom
## Multiple R-squared:  0.5809, Adjusted R-squared:  0.5627 
## F-statistic: 31.88 on 1 and 23 DF,  p-value: 9.525e-06
lm_B_V1482Afs_norm <- function(dar){
  1.12122 - 1.08509 * dar
}
dar_tmp <- dar_B$V1482Afs
lm_all <- lm_all %>%
  rbind(tibble(
    dataset = "B_V1482Afs",
    intercept = 1.12122,
    slope = -1.08509,
    dar_mean = mean(dar_tmp$dar_origin),
    dar_median = median(dar_tmp$dar_origin),
    dar_mean_0 = mean(dar_tmp$dar_origin[dar_tmp$dar_origin != 0]),
    dar_median_0 = median(dar_tmp$dar_origin[dar_tmp$dar_origin != 0])
  ))
beta_fits_B$V1482Afs %>%
  mutate(
    `1 - DAR` = 1  - dar,
    `sqrt(1 - DAR)` = sqrt(1 - dar),
    `(1.1 - DAR)^2` = (1.1 - dar)^2,
    `DAR lm` = lm_B_V1482Afs_norm(dar)
  ) %>%
  pivot_longer(cols = matches("DAR", FALSE), names_to = "f", values_to = "estimate") %>%
  mutate(estimate = ifelse(estimate > 1, 1, estimate)) %>%
  ggplot(aes(dar, shape1_norm)) +
  geom_point(
    data = . %>% distinct(dar, shape1_norm)
  ) +
  geom_line(aes(y = estimate, colour = f))

darP_B_V1482Afs_norm <- res_B$V1482Afs %>%
  as_tibble() %>%
  dplyr::select(
    range, starts_with("gene"), starts_with("log"), PValue, FDR
  ) %>%
  left_join(
    mcols(gene_dar_B$V1482Afs) %>% as_tibble() %>% dplyr::select(gene_id, dar)
  ) %>%
  mutate(
    dar = ifelse(is.na(dar), 0, dar),
    alpha = lm_B_V1482Afs_norm(dar),
    alpha = ifelse(alpha > 1, 1, alpha),
    alpha = ifelse(alpha < 0.1, 0.1, alpha),
    darP = pbeta(PValue, alpha, 1),
    FDR_darP = p.adjust(darP, "fdr"),
    wasDE = FDR < 0.05,
    isDE = FDR_darP < 0.05
  ) %>%
  dplyr::filter(wasDE | isDE) %>%
  dplyr::arrange(!isDE, darP) %>%
  as.data.frame()
kable(darP_B_V1482Afs_norm) %>%
  kable_paper() %>%
  column_spec(
    1:ncol(darP_B_V1482Afs_norm),
    background = ifelse(darP_B_V1482Afs_norm$isDE, "green4", "red3")
  )
range gene_id gene_name gene_biotype logFC logCPM PValue FDR dar alpha darP FDR_darP wasDE isDE
15:20403084-20405796:+ ENSDARG00000069464 cox7a1 protein_coding -2.241106 1.013476 2e-07 0.003347 0.4938229 0.5853777 0.0001177 0.5997684 TRUE FALSE
res_B$V1482Afs %>%
  as_tibble() %>%
  dplyr::select(
    range, starts_with("gene"), starts_with("log"), PValue, FDR
  ) %>%
  left_join(
    mcols(gene_dar_B$V1482Afs) %>% as_tibble() %>% dplyr::select(gene_id, dar)
  ) %>%
  mutate(
    dar = ifelse(is.na(dar), 0, dar),
    alpha = lm_B_V1482Afs_norm(dar),
    alpha = ifelse(alpha > 1, 1, alpha),
    alpha = ifelse(alpha < 0.1, 0.1, alpha),
    darP = pbeta(PValue, alpha, 1)
  ) %>%
  pivot_longer(
    cols = all_of(c("PValue", "darP")), names_to = "type", values_to = "p"
  ) %>%
  ggplot(aes(sample = -log(p), colour = type)) +
  stat_qq(distribution = qexp) +
  stat_qq_line(distribution = qexp) +
  facet_wrap(~type) +
  scale_colour_brewer(palette= "Set1")

R122Pfs

plotly::ggplotly({
  plotDarECDF(dar = dar_B$R122Pfs, dar_val = "origin")
})

Before normalisation of alpha

# lm(shape1 ~ dar + I(dar^2), data = beta_fits_B$R122Pfs) %>%
lm(shape1 ~ dar, data = beta_fits_B$R122Pfs) %>%
  step() %>%
  summary()
## Start:  AIC=-104.18
## shape1 ~ dar
## 
##        Df Sum of Sq     RSS      AIC
## <none>              0.33005 -104.185
## - dar   1     1.593 1.92307  -62.124
## 
## Call:
## lm(formula = shape1 ~ dar, data = beta_fits_B$R122Pfs)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.33278 -0.06104 -0.00661  0.09750  0.23034 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.01150    0.04173   48.21  < 2e-16 ***
## dar         -2.36484    0.22445  -10.54 2.83e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1198 on 23 degrees of freedom
## Multiple R-squared:  0.8284, Adjusted R-squared:  0.8209 
## F-statistic:   111 on 1 and 23 DF,  p-value: 2.827e-10
lm_B_R122Pfs <- function(dar){
  2.00307 - 2.41516 * dar
}
beta_fits_B$R122Pfs %>%
  mutate(
    `1 - DAR` = 1  - dar,
    `sqrt(1 - DAR)` = sqrt(1 - dar),
    `(1.1 - DAR)^2` = (1.1 - dar)^2,
    `DAR lm` = lm_B_R122Pfs(dar)
  ) %>%
  pivot_longer(cols = matches("DAR", FALSE), names_to = "f", values_to = "estimate") %>%
  mutate(estimate = ifelse(estimate > 1, 1, estimate)) %>%
  ggplot(aes(dar, shape1)) +
  geom_point(
    data = . %>% distinct(dar, shape1)
  ) +
  geom_line(aes(y = estimate, colour = f))

darP_B_R122Pfs <- res_B$R122Pfs %>%
  as_tibble() %>%
  dplyr::select(
    range, starts_with("gene"), starts_with("log"), PValue, FDR
  ) %>%
  left_join(
    mcols(gene_dar_B$R122Pfs) %>% as_tibble() %>% dplyr::select(gene_id, dar)
  ) %>%
  mutate(
    dar = ifelse(is.na(dar), 0, dar),
    alpha = lm_B_R122Pfs(dar),
    alpha = ifelse(alpha > 1, 1, alpha),
    alpha = ifelse(alpha < 0.1, 0.1, alpha),
    darP = pbeta(PValue, alpha, 1),
    FDR_darP = p.adjust(darP, "fdr"),
    wasDE = FDR < 0.05,
    isDE = FDR_darP < 0.05
  ) %>%
  dplyr::filter(wasDE | isDE) %>%
  dplyr::arrange(!isDE, darP) %>%
  as.data.frame()
kable(darP_B_R122Pfs) %>%
  kable_paper() %>%
  column_spec(
    1:ncol(darP_B_R122Pfs),
    background = ifelse(darP_B_R122Pfs$isDE, "green4", "red3")
  )
range gene_id gene_name gene_biotype logFC logCPM PValue FDR dar alpha darP FDR_darP wasDE isDE
15:14109652-14120431:+ ENSDARG00000016290 clca1 protein_coding -8.395536 4.148533 0.0e+00 0.0005694 0.000000 1.0000000 0.00e+00 0.0005694 TRUE TRUE
15:17030473-17048822:+ ENSDARG00000042332 plin2 protein_coding -2.610720 2.614403 3.2e-06 0.0278195 0.000000 1.0000000 3.20e-06 0.0278195 TRUE TRUE
15:21521803-21540276:- ENSDARG00000097580 BX663519.1 processed_transcript -3.524666 1.196269 7.9e-06 0.0455044 0.468254 0.8721617 3.55e-05 0.2042783 TRUE FALSE
res_B$R122Pfs %>%
  as_tibble() %>%
  dplyr::select(
    range, starts_with("gene"), starts_with("log"), PValue, FDR
  ) %>%
  left_join(
    mcols(gene_dar_B$R122Pfs) %>% as_tibble() %>% dplyr::select(gene_id, dar)
  ) %>%
  mutate(
    dar = ifelse(is.na(dar), 0, dar),
    alpha = lm_B_R122Pfs(dar),
    alpha = ifelse(alpha > 1, 1, alpha),
    alpha = ifelse(alpha < 0.1, 0.1, alpha),
    darP = pbeta(PValue, alpha, 1)
  ) %>%
  pivot_longer(
    cols = all_of(c("PValue", "darP")), names_to = "type", values_to = "p"
  ) %>%
  ggplot(aes(sample = -log(p), colour = type)) +
  stat_qq(distribution = qexp) +
  stat_qq_line(distribution = qexp) +
  facet_wrap(~type) +
  scale_colour_brewer(palette= "Set1")

After normalisation of alpha

# lm(shape1_norm ~ dar + I(dar^2), data = beta_fits_B$R122Pfs) %>%
lm(shape1_norm ~ dar, data = beta_fits_B$R122Pfs) %>%
  step() %>%
  summary()
## Start:  AIC=-139.98
## shape1_norm ~ dar
## 
##        Df Sum of Sq     RSS      AIC
## <none>              0.07884 -139.980
## - dar   1   0.38055 0.45939  -97.918
## 
## Call:
## lm(formula = shape1_norm ~ dar, data = beta_fits_B$R122Pfs)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.162649 -0.029833 -0.003233  0.047653  0.112581 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.98313    0.02039   48.21  < 2e-16 ***
## dar         -1.15583    0.10970  -10.54 2.83e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.05855 on 23 degrees of freedom
## Multiple R-squared:  0.8284, Adjusted R-squared:  0.8209 
## F-statistic:   111 on 1 and 23 DF,  p-value: 2.827e-10
lm_B_R122Pfs_norm <- function(dar){
  1.12165 - 1.35240 * dar
}
dar_tmp <- dar_B$R122Pfs
lm_all <- lm_all %>%
  rbind(tibble(
    dataset = "B_R122Pfs",
    intercept = 1.12165,
    slope = -1.35240,
    dar_mean = mean(dar_tmp$dar_origin),
    dar_median = median(dar_tmp$dar_origin),
    dar_mean_0 = mean(dar_tmp$dar_origin[dar_tmp$dar_origin != 0]),
    dar_median_0 = median(dar_tmp$dar_origin[dar_tmp$dar_origin != 0])
  ))
beta_fits_B$R122Pfs %>%
  mutate(
    `1 - DAR` = 1  - dar,
    `sqrt(1 - DAR)` = sqrt(1 - dar),
    `(1.1 - DAR)^2` = (1.1 - dar)^2,
    `DAR lm` = lm_B_R122Pfs_norm(dar)
  ) %>%
  pivot_longer(cols = matches("DAR", FALSE), names_to = "f", values_to = "estimate") %>%
  mutate(estimate = ifelse(estimate > 1, 1, estimate)) %>%
  ggplot(aes(dar, shape1_norm)) +
  geom_point(
    data = . %>% distinct(dar, shape1_norm)
  ) +
  geom_line(aes(y = estimate, colour = f))

darP_B_R122Pfs_norm <- res_B$R122Pfs %>%
  as_tibble() %>%
  dplyr::select(
    range, starts_with("gene"), starts_with("log"), PValue, FDR
  ) %>%
  left_join(
    mcols(gene_dar_B$R122Pfs) %>% as_tibble() %>% dplyr::select(gene_id, dar)
  ) %>%
  mutate(
    dar = ifelse(is.na(dar), 0, dar),
    alpha = lm_B_R122Pfs_norm(dar),
    alpha = ifelse(alpha > 1, 1, alpha),
    alpha = ifelse(alpha < 0.1, 0.1, alpha),
    darP = pbeta(PValue, alpha, 1),
    FDR_darP = p.adjust(darP, "fdr"),
    wasDE = FDR < 0.05,
    isDE = FDR_darP < 0.05
  ) %>%
  dplyr::filter(wasDE | isDE) %>%
  dplyr::arrange(!isDE, darP) %>%
  as.data.frame()
kable(darP_B_R122Pfs_norm) %>%
  kable_paper() %>%
  column_spec(
    1:ncol(darP_B_R122Pfs_norm),
    background = ifelse(darP_B_R122Pfs_norm$isDE, "green4", "red3")
  )
range gene_id gene_name gene_biotype logFC logCPM PValue FDR dar alpha darP FDR_darP wasDE isDE
15:14109652-14120431:+ ENSDARG00000016290 clca1 protein_coding -8.395536 4.148533 0.0e+00 0.0005694 0.000000 1.0000000 0.0000000 0.0005694 TRUE TRUE
15:17030473-17048822:+ ENSDARG00000042332 plin2 protein_coding -2.610720 2.614403 3.2e-06 0.0278195 0.000000 1.0000000 0.0000032 0.0278195 TRUE TRUE
15:21521803-21540276:- ENSDARG00000097580 BX663519.1 processed_transcript -3.524666 1.196269 7.9e-06 0.0455044 0.468254 0.4883833 0.0032248 0.9999337 TRUE FALSE
res_B$R122Pfs %>%
  as_tibble() %>%
  dplyr::select(
    range, starts_with("gene"), starts_with("log"), PValue, FDR
  ) %>%
  left_join(
    mcols(gene_dar_B$R122Pfs) %>% as_tibble() %>% dplyr::select(gene_id, dar)
  ) %>%
  mutate(
    dar = ifelse(is.na(dar), 0, dar),
    alpha = lm_B_R122Pfs_norm(dar),
    alpha = ifelse(alpha > 1, 1, alpha),
    alpha = ifelse(alpha < 0.1, 0.1, alpha),
    darP = pbeta(PValue, alpha, 1)
  ) %>%
  pivot_longer(
    cols = all_of(c("PValue", "darP")), names_to = "type", values_to = "p"
  ) %>%
  ggplot(aes(sample = -log(p), colour = type)) +
  stat_qq(distribution = qexp) +
  stat_qq_line(distribution = qexp) +
  facet_wrap(~type) +
  scale_colour_brewer(palette= "Set1")

Trans

plotly::ggplotly({
  plotDarECDF(dar = dar_B$trans, dar_val = "origin")
})

Before normalisation of alpha

# lm(shape1 ~ dar + I(dar^2), data = beta_fits_B$trans) %>%
lm(shape1 ~ dar, data = beta_fits_B$trans) %>%
  step() %>%
  summary()
## Start:  AIC=-105.46
## shape1 ~ dar
## 
##        Df Sum of Sq     RSS      AIC
## <none>              0.31365 -105.459
## - dar   1   0.60003 0.91368  -80.729
## 
## Call:
## lm(formula = shape1 ~ dar, data = beta_fits_B$trans)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.238107 -0.065328  0.004621  0.071437  0.174576 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.54215    0.04292  35.931  < 2e-16 ***
## dar         -1.81260    0.27326  -6.633  9.1e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1168 on 23 degrees of freedom
## Multiple R-squared:  0.6567, Adjusted R-squared:  0.6418 
## F-statistic:    44 on 1 and 23 DF,  p-value: 9.097e-07
lm_B_trans <- function(dar){
  1.58624 - 1.95257 * dar
}
beta_fits_B$trans %>%
  mutate(
    `1 - DAR` = 1  - dar,
    `sqrt(1 - DAR)` = sqrt(1 - dar),
    `(1.1 - DAR)^2` = (1.1 - dar)^2,
    `DAR lm` = lm_B_trans(dar)
  ) %>%
  pivot_longer(cols = matches("DAR", FALSE), names_to = "f", values_to = "estimate") %>%
  mutate(estimate = ifelse(estimate > 1, 1, estimate)) %>%
  ggplot(aes(dar, shape1)) +
  geom_point(
    data = . %>% distinct(dar, shape1)
  ) +
  geom_line(aes(y = estimate, colour = f))

darP_B_trans <- res_B$trans %>%
  as_tibble() %>%
  dplyr::select(
    range, starts_with("gene"), starts_with("log"), PValue, FDR
  ) %>%
  left_join(
    mcols(gene_dar_B$trans) %>% as_tibble() %>% dplyr::select(gene_id, dar)
  ) %>%
  mutate(
    dar = ifelse(is.na(dar), 0, dar),
    alpha = lm_B_trans(dar),
    alpha = ifelse(alpha > 1, 1, alpha),
    alpha = ifelse(alpha < 0.1, 0.1, alpha),
    darP = pbeta(PValue, alpha, 1),
    FDR_darP = p.adjust(darP, "fdr"),
    wasDE = FDR < 0.05,
    isDE = FDR_darP < 0.05
  ) %>%
  dplyr::filter(wasDE | isDE) %>%
  dplyr::arrange(!isDE, darP) %>%
  as.data.frame()
  • No genes were DE
res_B$trans %>%
  as_tibble() %>%
  dplyr::select(
    range, starts_with("gene"), starts_with("log"), PValue, FDR
  ) %>%
  left_join(
    mcols(gene_dar_B$trans) %>% as_tibble() %>% dplyr::select(gene_id, dar)
  ) %>%
  mutate(
    dar = ifelse(is.na(dar), 0, dar),
    alpha = lm_B_trans(dar),
    alpha = ifelse(alpha > 1, 1, alpha),
    alpha = ifelse(alpha < 0.1, 0.1, alpha),
    darP = pbeta(PValue, alpha, 1)
  ) %>%
  pivot_longer(
    cols = all_of(c("PValue", "darP")), names_to = "type", values_to = "p"
  ) %>%
  ggplot(aes(sample = -log(p), colour = type)) +
  stat_qq(distribution = qexp) +
  stat_qq_line(distribution = qexp) +
  facet_wrap(~type) +
  scale_colour_brewer(palette= "Set1")

After normalisation of alpha

# lm(shape1_norm ~ dar + I(dar^2), data = beta_fits_B$trans) %>%
lm(shape1_norm ~ dar, data = beta_fits_B$trans) %>%
  step() %>%
  summary()
## Start:  AIC=-127.15
## shape1_norm ~ dar
## 
##        Df Sum of Sq     RSS     AIC
## <none>              0.13172 -127.15
## - dar   1   0.25199 0.38371 -102.42
## 
## Call:
## lm(formula = shape1_norm ~ dar, data = beta_fits_B$trans)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.154303 -0.042335  0.002995  0.046294  0.113133 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.99937    0.02781  35.931  < 2e-16 ***
## dar         -1.17464    0.17708  -6.633  9.1e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.07568 on 23 degrees of freedom
## Multiple R-squared:  0.6567, Adjusted R-squared:  0.6418 
## F-statistic:    44 on 1 and 23 DF,  p-value: 9.097e-07
lm_B_trans_norm <- function(dar){
  1.14804 - 1.41317 * dar
}
dar_tmp <- dar_B$trans
lm_all <- lm_all %>%
  rbind(tibble(
    dataset = "B_trans",
    intercept = 1.14804,
    slope = -1.41317,
    dar_mean = mean(dar_tmp$dar_origin),
    dar_median = median(dar_tmp$dar_origin),
    dar_mean_0 = mean(dar_tmp$dar_origin[dar_tmp$dar_origin != 0]),
    dar_median_0 = median(dar_tmp$dar_origin[dar_tmp$dar_origin != 0])
  ))
beta_fits_B$trans %>%
  mutate(
    `1 - DAR` = 1  - dar,
    `sqrt(1 - DAR)` = sqrt(1 - dar),
    `(1.1 - DAR)^2` = (1.1 - dar)^2,
    `DAR lm` = lm_B_trans_norm(dar)
  ) %>%
  pivot_longer(cols = matches("DAR", FALSE), names_to = "f", values_to = "estimate") %>%
  mutate(estimate = ifelse(estimate > 1, 1, estimate)) %>%
  ggplot(aes(dar, shape1_norm)) +
  geom_point(
    data = . %>% distinct(dar, shape1_norm)
  ) +
  geom_line(aes(y = estimate, colour = f))

darP_B_trans_norm <- res_B$trans %>%
  as_tibble() %>%
  dplyr::select(
    range, starts_with("gene"), starts_with("log"), PValue, FDR
  ) %>%
  left_join(
    mcols(gene_dar_B$trans) %>% as_tibble() %>% dplyr::select(gene_id, dar)
  ) %>%
  mutate(
    dar = ifelse(is.na(dar), 0, dar),
    alpha = lm_B_trans_norm(dar),
    alpha = ifelse(alpha > 1, 1, alpha),
    alpha = ifelse(alpha < 0.1, 0.1, alpha),
    darP = pbeta(PValue, alpha, 1),
    FDR_darP = p.adjust(darP, "fdr"),
    wasDE = FDR < 0.05,
    isDE = FDR_darP < 0.05
  ) %>%
  dplyr::filter(wasDE | isDE) %>%
  dplyr::arrange(!isDE, darP) %>%
  as.data.frame()
  • No genes were DE
res_B$trans %>%
  as_tibble() %>%
  dplyr::select(
    range, starts_with("gene"), starts_with("log"), PValue, FDR
  ) %>%
  left_join(
    mcols(gene_dar_B$trans) %>% as_tibble() %>% dplyr::select(gene_id, dar)
  ) %>%
  mutate(
    dar = ifelse(is.na(dar), 0, dar),
    alpha = lm_B_trans_norm(dar),
    alpha = ifelse(alpha > 1, 1, alpha),
    alpha = ifelse(alpha < 0.1, 0.1, alpha),
    darP = pbeta(PValue, alpha, 1)
  ) %>%
  pivot_longer(
    cols = all_of(c("PValue", "darP")), names_to = "type", values_to = "p"
  ) %>%
  ggplot(aes(sample = -log(p), colour = type)) +
  stat_qq(distribution = qexp) +
  stat_qq_line(distribution = qexp) +
  facet_wrap(~type) +
  scale_colour_brewer(palette= "Set1")

Dataset C (zebrafish)

Sample structure

  • T428del/+ (\(n = 7\), psen1 gene, Alzheimer’s disease model)
  • W233fs/+ (\(n = 8\), psen1 gene, Acne Inversa disease model)
  • WT (\(n = 9\))

Load data

meta_C <- read_csv(here("data/metadata/psen1.csv")) %>%
  dplyr::select(-sample) %>%
  dplyr::rename(sample = basename, genotype = Genotype) %>%
  ## We need some sample aliases that follow R naming conventions
  mutate(
    alias = c(
      paste0(rep("fAD", 7), seq(1, 7)),
      paste0(rep("fAI", 8), seq(1, 8)),
      paste0(rep("wt", 9), seq(1, 9))
    ),
    group = genotype
  )
meta_C$genotype <- fct_relevel(
  meta_C$genotype,
  c("WT", "EOfAD-like/+", "fAI-like/+")
)
dar_C <- read_rds(here("data/dar/dar_fixed_C.Rds")) %>%
  sortSeqlevels() %>%
  endoapply(sort)
gene_dar_C <- read_rds("data/dar/gene_dar_fixed_C.Rds") %>%
  sortSeqlevels() %>%
  endoapply(sort)
dge_C <- read_rds("data/de/dgeList_psen1.Rds")
dge_C$samples <- dge_C$samples %>% 
  mutate(
    genotype = case_when(
      genotype == "fAI-like/+" ~ "fai",
      genotype == "EOfAD-like/+" ~ "eofad",
      genotype == "WT" ~ "wt"
    ),
    genotype = factor(genotype, levels = c("wt", "eofad", "fai"))
  )
dge_C <- DGEList(
  counts = dge_C$counts,
  samples = dge_C$samples,
  genes = dge_C$genes
)
dge_C <- dge_C %>%
  estimateDisp(design = model.matrix(~0 + genotype, .$sample))
colnames(dge_C$design) <- str_remove_all(colnames(dge_C$design), "^genotype")
contrasts_C <- makeContrasts(
  eofad = fai - wt,
  fai = eofad - wt,
  levels = unique(dge_C$samples$genotype)
)
## Rerun the analysis using glmQL-Fits
res_C <- lapply(colnames(contrasts_C), function(x){
  dge_C %>%
    glmQLFit() %>%
    glmQLFTest(contrast = contrasts_C[,x]) %>%
    topTags(n = Inf) %>%
    .[["table"]] %>%
    makeGRangesFromDataFrame(keep.extra.columns = TRUE) %>%
    sortSeqlevels() %>%
    sort()
}) %>%
  set_names(colnames(contrasts_C))

Exploration

lapply(names(res_C), function(x){
  res_C[[x]] %>%
    mcols() %>%
    as_tibble() %>%
    dplyr::select(
      starts_with("gene"), starts_with("log"), PValue, FDR
    ) %>%
    left_join(
      mcols(gene_dar_C[[x]]) %>% as_tibble() %>% dplyr::select(gene_id, dar)
    ) %>%
    ggplot(aes(dar, -log10(PValue))) +
    geom_point() +
    labs(title = x)
})

  • Try breaking DAR into quantiles
lapply(names(res_C), function(x){
  res_C[[x]] %>%
    mcols() %>%
    as_tibble() %>%
    dplyr::select(
      starts_with("gene"), starts_with("log"), PValue, FDR
    ) %>%
    left_join(
      mcols(gene_dar_C[[x]]) %>% as_tibble() %>% dplyr::select(gene_id, dar)
    ) %>%
    mutate(
      dar_q = cut(
        dar,
        breaks = quantile(dar, probs = seq(0, 1, length.out = 21), na.rm = TRUE),
        # breaks = seq(0, 1, length.out = 7),
        include.lowest = TRUE
      ) %>%
        fct_na_value_to_level(levels(.)[[1]])
    ) %>%
    ggplot(aes(PValue, after_stat(density))) +
    geom_histogram(fill = "grey70", colour = "black", binwidth = 0.01) +
    geom_line(
      ## Add 1 - dar^2 in red
      aes(y = y, colour = f),
      data = . %>%
        pull(dar_q) %>%
        levels() %>%
        sapply(
          \(x) {
            min_dar = str_replace_all(x, "^[^0-9]([0-9\\.]+),.+", "\\1") %>% as.numeric()
            tibble(
              PValue = seq(0.01, 1, by = 0.01),
              `1 - DAR` = dbeta(PValue, 1 - min_dar, 1),
              `1 - DAR^2` = dbeta(PValue, 1 - min_dar^2, 1),
              `sqrt(1 - DAR)` = dbeta(PValue, sqrt(1 - min_dar), 1),
            )
          },
          simplify = FALSE
        ) %>%
        bind_rows(.id = "dar_q") %>%
        pivot_longer(
          cols = contains("- DAR"), names_to = "f", values_to = "y"
        ) %>%
        mutate(dar_q = fct_inorder(dar_q)),
    ) +
    geom_label(
      aes(x = 1, y = 0, label = label),
      data = . %>%
        summarise(n = dplyr::n(), .by = dar_q) %>%
        mutate(label = paste("n =", comma(n))),
      hjust = 1, vjust = 0, alpha = 0.7
    ) +
    facet_wrap(~dar_q, scales = "free_y") +
    labs(title = x)
})

beta_fits_C <- sapply(names(res_C), simplify = FALSE, function(x){
  # browser()
  res_C[[x]] %>%
    as_tibble() %>%
    dplyr::select(
      range, starts_with("gene"), starts_with("log"), PValue, FDR
    ) %>%
    left_join(
      mcols(gene_dar_C[[x]]) %>% as_tibble() %>% dplyr::select(gene_id, dar)
    ) %>%
    dplyr::filter(!is.na(dar)) %>%
    # mutate(dar_q = cut(dar, seq(0, 1, length.out = 5))) %>%
    mutate(
      dar_q = cut(
        dar,
        unique(quantile(dar, probs = seq(0, 1, length.out = 26))),
        include.lowest = TRUE
      )
    ) %>%
    split(.$dar_q) %>%
    lapply(
      \(x) {
        tryCatch({  # !!
          fit <- MASS::fitdistr(x$PValue, "beta", list(shape1 = 1, shape2 = 1))
          tibble(
            dar = median(x$dar),
            shape1 = fit$estimate["shape1"],
            se1 = fit$sd["shape1"],
            shape2 = fit$estimate["shape2"],
            se2 = fit$sd["shape2"]
          )
        }, error = \(e){
          tibble(
            dar = median(x$dar), shape1 = NA, se1 = NA, shape2 = NA, se2 = NA
          )
        })
      }
    ) %>%
    bind_rows() %>%
    mutate(shape1_norm = shape1 / max(shape1, na.rm = TRUE))
})
sapply(beta_fits_C, simplify = FALSE, function(x){
  as.data.frame(x)
})
## $eofad
##           dar    shape1        se1    shape2        se2 shape1_norm
## 1  0.00000000 1.0516492 0.03379651 1.0469804 0.03361715   0.8726949
## 2  0.01091588 1.2050593 0.05501863 1.2195521 0.05579869   1.0000000
## 3  0.01984127 1.0062750 0.04575874 0.9728413 0.04393538   0.8350419
## 4  0.02821869 1.0223467 0.04540182 1.0205202 0.04530455   0.8483787
## 5  0.03556463 0.9607481 0.04419799 1.0342756 0.04829636   0.7972620
## 6  0.04149300 1.0918347 0.04966723 1.1055569 0.05041100   0.9060423
## 7  0.04728401 0.9796968 0.04402270 1.0893014 0.04998645   0.8129864
## 8  0.05294419 0.9752321 0.04269245 1.0216589 0.04514764   0.8092814
## 9  0.05839593 1.0292494 0.04799304 1.1055744 0.05227203   0.8541068
## 10 0.06349206 0.9262727 0.04161233 0.9965386 0.04545224   0.7686532
## 11 0.06854257 0.9905245 0.04378336 1.0992170 0.04959329   0.8219716
## 12 0.07397548 0.9080890 0.04126998 1.0479619 0.04904841   0.7535637
## 13 0.07975834 0.9595551 0.04286855 1.0412356 0.04729226   0.7962721
## 14 0.08627932 0.9489121 0.04295761 1.0222127 0.04698381   0.7874402
## 15 0.09308390 1.0211514 0.04602375 1.1189878 0.05133479   0.8473869
## 16 0.10000000 0.9341859 0.04171355 0.9812842 0.04426623   0.7752199
## 17 0.10714286 0.9356994 0.04202065 1.1394177 0.05320668   0.7764758
## 18 0.11528415 0.9808316 0.04415574 1.0722643 0.04913533   0.8139281
## 19 0.12500000 0.9316319 0.04172656 1.0476917 0.04806032   0.7731005
## 20 0.13640782 0.9029964 0.03972615 1.0551631 0.04792830   0.7493377
## 21 0.15000000 0.9170364 0.04155176 1.0458633 0.04868210   0.7609886
## 22 0.16666667 0.9950321 0.04448694 1.2212485 0.05676706   0.8257121
## 23 0.19107143 0.9207534 0.04121691 1.0350035 0.04745662   0.7640731
## 24 0.24183187 0.8603681 0.03817618 1.0426690 0.04816242   0.7139632
## 
## $fai
##           dar    shape1        se1    shape2        se2 shape1_norm
## 1  0.00000000 1.1486138 0.03764128 0.9516062 0.03005250   1.0000000
## 2  0.01428571 1.0957708 0.05070408 0.9068342 0.04038256   0.9539941
## 3  0.02262586 1.1195180 0.05192515 0.9154556 0.04077944   0.9746687
## 4  0.02922403 1.1261794 0.05190010 0.9835558 0.04414200   0.9804683
## 5  0.03639991 1.1342095 0.05263045 0.9266241 0.04130143   0.9874594
## 6  0.04394514 1.0312479 0.04733179 0.9121617 0.04082096   0.8978195
## 7  0.05169753 1.0489497 0.04822769 0.9221756 0.04129802   0.9132310
## 8  0.05837083 1.0815635 0.04958688 0.9870004 0.04443917   0.9416250
## 9  0.06548263 0.8891103 0.04019420 0.8805064 0.03972118   0.7740724
## 10 0.07232824 1.0177335 0.04662505 0.9141083 0.04095848   0.8860537
## 11 0.07893805 0.9467241 0.04316280 0.8771434 0.03934318   0.8242319
## 12 0.08541667 1.0239182 0.04659356 0.9971198 0.04513316   0.8914381
## 13 0.09189714 0.9965465 0.04542865 0.9448180 0.04260047   0.8676080
## 14 0.09925832 0.9790886 0.04452669 0.9401230 0.04239619   0.8524089
## 15 0.10698578 0.9750491 0.04433896 0.9102661 0.04080082   0.8488920
## 16 0.11548764 0.9898035 0.04513674 0.9585039 0.04342190   0.8617374
## 17 0.12447222 0.9363662 0.04257554 0.8895555 0.04000652   0.8152141
## 18 0.13414386 0.9813786 0.04408310 0.9232305 0.04094681   0.8544026
## 19 0.14372777 1.0465242 0.04866822 0.9532246 0.04349996   0.9111193
## 20 0.15502476 0.9670394 0.04378116 0.9740016 0.04416152   0.8419186
## 21 0.17061408 0.9559307 0.04330687 0.9564211 0.04333370   0.8322473
## 22 0.19311869 0.9562792 0.04336182 0.9399217 0.04246684   0.8325507
## 23 0.22261241 1.0061136 0.04588987 0.9518935 0.04292702   0.8759373
## 24 0.31047012 1.0423455 0.04777231 0.9406446 0.04222044   0.9074813

EOfAD

plotly::ggplotly({
  plotDarECDF(dar = dar_C$eofad, dar_val = "origin")
})

Before normalisation of alpha

lm(shape1 ~ dar + I(dar^2), data = beta_fits_C$eofad) %>%
  # lm(shape1 ~ dar, data = beta_fits_C$eofad) %>%
  step() %>%
  summary()
## Start:  AIC=-137.41
## shape1 ~ dar + I(dar^2)
## 
##            Df Sum of Sq      RSS     AIC
## <none>                  0.060981 -137.41
## - I(dar^2)  1 0.0058392 0.066820 -137.21
## - dar       1 0.0210234 0.082004 -132.30
## 
## Call:
## lm(formula = shape1 ~ dar + I(dar^2), data = beta_fits_C$eofad)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.07043 -0.02797 -0.01275  0.02135  0.14485 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.07728    0.02901  37.133   <2e-16 ***
## dar         -1.60333    0.59588  -2.691   0.0137 *  
## I(dar^2)     3.62761    2.55818   1.418   0.1708    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.05389 on 21 degrees of freedom
## Multiple R-squared:  0.4876, Adjusted R-squared:  0.4388 
## F-statistic: 9.992 on 2 and 21 DF,  p-value: 0.0008932
lm_C_eofad <- function(dar){
  1.03004 - 0.77544 * dar
}
beta_fits_C$eofad %>%
  mutate(
    `1 - DAR` = 1  - dar,
    `sqrt(1 - DAR)` = sqrt(1 - dar),
    `(1.1 - DAR)^2` = (1.1 - dar)^2,
    `DAR lm` = lm_C_eofad(dar)
  ) %>%
  pivot_longer(cols = matches("DAR", FALSE), names_to = "f", values_to = "estimate") %>%
  mutate(estimate = ifelse(estimate > 1, 1, estimate)) %>%
  ggplot(aes(dar, shape1)) +
  geom_point(
    data = . %>% distinct(dar, shape1)
  ) +
  geom_line(aes(y = estimate, colour = f))

darP_C_eofad <- res_C$eofad %>%
  as_tibble() %>%
  dplyr::select(
    range, starts_with("gene"), starts_with("log"), PValue, FDR
  ) %>%
  left_join(
    mcols(gene_dar_C$eofad) %>% as_tibble() %>% dplyr::select(gene_id, dar)
  ) %>%
  mutate(
    dar = ifelse(is.na(dar), 0, dar),
    alpha = lm_C_eofad(dar),
    alpha = ifelse(alpha > 1, 1, alpha),
    alpha = ifelse(alpha < 0.1, 0.1, alpha),
    darP = pbeta(PValue, alpha, 1),
    FDR_darP = p.adjust(darP, "fdr"),
    wasDE = FDR < 0.05,
    isDE = FDR_darP < 0.05
  ) %>%
  dplyr::filter(wasDE | isDE) %>%
  dplyr::arrange(!isDE, darP) %>%
  as.data.frame()
kable(darP_C_eofad) %>%
  kable_paper() %>%
  column_spec(
    1:ncol(darP_C_eofad),
    background = ifelse(darP_C_eofad$isDE, "green4", "red3")
  )
range gene_id gene_name gene_biotype logFC logCPM PValue FDR dar alpha darP FDR_darP wasDE isDE
17:51206225-51224800:- ENSDARG00000004870 psen1 protein_coding -0.7621412 5.3665843 0.0e+00 0.0000000 0.3305692 0.7737035 0.00e+00 0.0000000 TRUE TRUE
17:38738840-38778901:- ENSDARG00000005179 dglucy protein_coding -0.9308747 5.0312423 3.9e-06 0.0425888 0.0380146 1.0000000 3.90e-06 0.0425888 TRUE TRUE
17:5611166-5622440:+ ENSDARG00000067665 fam167aa protein_coding -4.7628058 0.2296959 6.0e-06 0.0435244 0.1012693 0.9515118 1.07e-05 0.0779892 TRUE FALSE
res_C$eofad %>%
  as_tibble() %>%
  dplyr::select(
    range, starts_with("gene"), starts_with("log"), PValue, FDR
  ) %>%
  left_join(
    mcols(gene_dar_C$eofad) %>% as_tibble() %>% dplyr::select(gene_id, dar)
  ) %>%
  mutate(
    dar = ifelse(is.na(dar), 0, dar),
    alpha = lm_C_eofad(dar),
    alpha = ifelse(alpha > 1, 1, alpha),
    alpha = ifelse(alpha < 0.1, 0.1, alpha),
    darP = pbeta(PValue, alpha, 1)
  ) %>%
  pivot_longer(
    cols = all_of(c("PValue", "darP")), names_to = "type", values_to = "p"
  ) %>%
  ggplot(aes(sample = -log(p), colour = type)) +
  stat_qq(distribution = qexp) +
  stat_qq_line(distribution = qexp) +
  facet_wrap(~type) +
  scale_colour_brewer(palette= "Set1")

After normalisation of alpha

lm(shape1_norm ~ dar + I(dar^2), data = beta_fits_C$eofad) %>%
  # lm(shape1_norm ~ dar, data = beta_fits_C$eofad) %>%
  step() %>%
  summary()
## Start:  AIC=-146.36
## shape1_norm ~ dar + I(dar^2)
## 
##            Df Sum of Sq      RSS     AIC
## <none>                  0.041993 -146.36
## - I(dar^2)  1  0.004021 0.046014 -146.16
## - dar       1  0.014477 0.056470 -141.25
## 
## Call:
## lm(formula = shape1_norm ~ dar + I(dar^2), data = beta_fits_C$eofad)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.05845 -0.02321 -0.01058  0.01772  0.12020 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.89396    0.02407  37.133   <2e-16 ***
## dar         -1.33050    0.49448  -2.691   0.0137 *  
## I(dar^2)     3.01032    2.12286   1.418   0.1708    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.04472 on 21 degrees of freedom
## Multiple R-squared:  0.4876, Adjusted R-squared:  0.4388 
## F-statistic: 9.992 on 2 and 21 DF,  p-value: 0.0008932
lm_C_eofad_norm <- function(dar){
  1.08284 - 0.81518 * dar
}
dar_tmp <- dar_C$eofad
lm_all <- lm_all %>%
  rbind(tibble(
    dataset = "C_eofad",
    intercept = 1.08284,
    slope = -0.81518,
    dar_mean = mean(dar_tmp$dar_origin),
    dar_median = median(dar_tmp$dar_origin),
    dar_mean_0 = mean(dar_tmp$dar_origin[dar_tmp$dar_origin != 0]),
    dar_median_0 = median(dar_tmp$dar_origin[dar_tmp$dar_origin != 0])
  ))
beta_fits_C$eofad %>%
  mutate(
    `1 - DAR` = 1  - dar,
    `sqrt(1 - DAR)` = sqrt(1 - dar),
    `(1.1 - DAR)^2` = (1.1 - dar)^2,
    `DAR lm` = lm_C_eofad_norm(dar)
  ) %>%
  pivot_longer(cols = matches("DAR", FALSE), names_to = "f", values_to = "estimate") %>%
  mutate(estimate = ifelse(estimate > 1, 1, estimate)) %>%
  ggplot(aes(dar, shape1_norm)) +
  geom_point(
    data = . %>% distinct(dar, shape1_norm)
  ) +
  geom_line(aes(y = estimate, colour = f))

darP_C_eofad_norm <- res_C$eofad %>%
  as_tibble() %>%
  dplyr::select(
    range, starts_with("gene"), starts_with("log"), PValue, FDR
  ) %>%
  left_join(
    mcols(gene_dar_C$eofad) %>% as_tibble() %>% dplyr::select(gene_id, dar)
  ) %>%
  mutate(
    dar = ifelse(is.na(dar), 0, dar),
    alpha = lm_C_eofad_norm(dar),
    alpha = ifelse(alpha > 1, 1, alpha),
    alpha = ifelse(alpha < 0.1, 0.1, alpha),
    darP = pbeta(PValue, alpha, 1),
    FDR_darP = p.adjust(darP, "fdr"),
    wasDE = FDR < 0.05,
    isDE = FDR_darP < 0.05
  ) %>%
  dplyr::filter(wasDE | isDE) %>%
  dplyr::arrange(!isDE, darP) %>%
  as.data.frame()
kable(darP_C_eofad_norm) %>%
  kable_paper() %>%
  column_spec(
    1:ncol(darP_C_eofad_norm),
    background = ifelse(darP_C_eofad_norm$isDE, "green4", "red3")
  )
range gene_id gene_name gene_biotype logFC logCPM PValue FDR dar alpha darP FDR_darP wasDE isDE
17:51206225-51224800:- ENSDARG00000004870 psen1 protein_coding -0.7621412 5.3665843 0.0e+00 0.0000000 0.3305692 0.8133666 0.0e+00 0.0000000 TRUE TRUE
17:38738840-38778901:- ENSDARG00000005179 dglucy protein_coding -0.9308747 5.0312423 3.9e-06 0.0425888 0.0380146 1.0000000 3.9e-06 0.0425888 TRUE TRUE
17:5611166-5622440:+ ENSDARG00000067665 fam167aa protein_coding -4.7628058 0.2296959 6.0e-06 0.0435244 0.1012693 1.0000000 6.0e-06 0.0435244 TRUE TRUE
res_C$eofad %>%
  as_tibble() %>%
  dplyr::select(
    range, starts_with("gene"), starts_with("log"), PValue, FDR
  ) %>%
  left_join(
    mcols(gene_dar_C$eofad) %>% as_tibble() %>% dplyr::select(gene_id, dar)
  ) %>%
  mutate(
    dar = ifelse(is.na(dar), 0, dar),
    alpha = lm_C_eofad_norm(dar),
    alpha = ifelse(alpha > 1, 1, alpha),
    alpha = ifelse(alpha < 0.1, 0.1, alpha),
    darP = pbeta(PValue, alpha, 1)
  ) %>%
  pivot_longer(
    cols = all_of(c("PValue", "darP")), names_to = "type", values_to = "p"
  ) %>%
  ggplot(aes(sample = -log(p), colour = type)) +
  stat_qq(distribution = qexp) +
  stat_qq_line(distribution = qexp) +
  facet_wrap(~type) +
  scale_colour_brewer(palette= "Set1")

fAI

plotly::ggplotly({
  plotDarECDF(dar = dar_C$fai, dar_val = "origin")
})

Before normalisation of alpha

# lm(shape1 ~ dar + I(dar^2), data = beta_fits_C$fai) %>%
lm(shape1 ~ dar, data = beta_fits_C$fai) %>%
  step() %>%
  summary()
## Start:  AIC=-131.06
## shape1 ~ dar
## 
##        Df Sum of Sq      RSS     AIC
## <none>              0.086348 -131.06
## - dar   1  0.023431 0.109779 -127.30
## 
## Call:
## lm(formula = shape1 ~ dar, data = beta_fits_C$fai)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.14715 -0.03109 -0.01499  0.04277  0.11335 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.06493    0.02218  48.021   <2e-16 ***
## dar         -0.43783    0.17920  -2.443    0.023 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.06265 on 22 degrees of freedom
## Multiple R-squared:  0.2134, Adjusted R-squared:  0.1777 
## F-statistic:  5.97 on 1 and 22 DF,  p-value: 0.02304
lm_C_fai <- function(dar){
  1.27179 - 1.70850 * dar
}
beta_fits_C$fai %>%
  mutate(
    `1 - DAR` = 1  - dar,
    `sqrt(1 - DAR)` = sqrt(1 - dar),
    `(1.1 - DAR)^2` = (1.1 - dar)^2,
    `DAR lm` = lm_C_fai(dar)
  ) %>%
  pivot_longer(cols = matches("DAR", FALSE), names_to = "f", values_to = "estimate") %>%
  mutate(estimate = ifelse(estimate > 1, 1, estimate)) %>%
  ggplot(aes(dar, shape1)) +
  geom_point(
    data = . %>% distinct(dar, shape1)
  ) +
  geom_line(aes(y = estimate, colour = f))

darP_C_fai <- res_C$fai %>%
  as_tibble() %>%
  dplyr::select(
    range, starts_with("gene"), starts_with("log"), PValue, FDR
  ) %>%
  left_join(
    mcols(gene_dar_C$fai) %>% as_tibble() %>% dplyr::select(gene_id, dar)
  ) %>%
  mutate(
    dar = ifelse(is.na(dar), 0, dar),
    alpha = lm_C_fai(dar),
    alpha = ifelse(alpha > 1, 1, alpha),
    alpha = ifelse(alpha < 0.1, 0.1, alpha),
    darP = pbeta(PValue, alpha, 1),
    FDR_darP = p.adjust(darP, "fdr"),
    wasDE = FDR < 0.05,
    isDE = FDR_darP < 0.05
  ) %>%
  dplyr::filter(wasDE | isDE) %>%
  dplyr::arrange(!isDE, darP) %>%
  as.data.frame()
kable(darP_C_fai) %>%
  kable_paper() %>%
  column_spec(
    1:ncol(darP_C_fai),
    background = ifelse(darP_C_fai$isDE, "green4", "red3")
  )
range gene_id gene_name gene_biotype logFC logCPM PValue FDR dar alpha darP FDR_darP wasDE isDE
17:49785495-49978986:- ENSDARG00000078322 col12a1a protein_coding 1.9045922 3.854349 0.0e+00 0.0000195 0.2215742 0.8932305 0.00e+00 0.0001801 TRUE TRUE
17:50413084-50422584:- ENSDARG00000093378 si:ch211-235i11.5 protein_coding 0.6341595 3.057743 1.0e-07 0.0007415 0.0677979 1.0000000 1.00e-07 0.0007415 TRUE TRUE
7:32901658-32924520:+ ENSDARG00000079324 ano9b protein_coding 1.4319607 -1.240503 1.5e-06 0.0100260 0.0669498 1.0000000 1.50e-06 0.0106873 TRUE TRUE
17:51224421-51229737:+ ENSDARG00000020448 adi1 protein_coding 0.6019222 2.881561 1.8e-06 0.0100260 0.3331819 0.7025487 9.33e-05 0.2268036 TRUE FALSE
res_C$fai %>%
  as_tibble() %>%
  dplyr::select(
    range, starts_with("gene"), starts_with("log"), PValue, FDR
  ) %>%
  left_join(
    mcols(gene_dar_C$fai) %>% as_tibble() %>% dplyr::select(gene_id, dar)
  ) %>%
  mutate(
    dar = ifelse(is.na(dar), 0, dar),
    alpha = lm_C_fai(dar),
    alpha = ifelse(alpha > 1, 1, alpha),
    alpha = ifelse(alpha < 0.1, 0.1, alpha),
    darP = pbeta(PValue, alpha, 1)
  ) %>%
  pivot_longer(
    cols = all_of(c("PValue", "darP")), names_to = "type", values_to = "p"
  ) %>%
  ggplot(aes(sample = -log(p), colour = type)) +
  stat_qq(distribution = qexp) +
  stat_qq_line(distribution = qexp) +
  facet_wrap(~type) +
  scale_colour_brewer(palette= "Set1")

After normalisation of alpha

# lm(shape1_norm ~ dar + I(dar^2), data = beta_fits_C$fai) %>%
lm(shape1_norm ~ dar, data = beta_fits_C$fai) %>%
  step() %>%
  summary()
## Start:  AIC=-137.71
## shape1_norm ~ dar
## 
##        Df Sum of Sq      RSS     AIC
## <none>              0.065449 -137.71
## - dar   1   0.01776 0.083209 -133.95
## 
## Call:
## lm(formula = shape1_norm ~ dar, data = beta_fits_C$fai)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.12811 -0.02707 -0.01305  0.03724  0.09868 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.92715    0.01931  48.021   <2e-16 ***
## dar         -0.38118    0.15601  -2.443    0.023 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.05454 on 22 degrees of freedom
## Multiple R-squared:  0.2134, Adjusted R-squared:  0.1777 
## F-statistic:  5.97 on 1 and 22 DF,  p-value: 0.02304
lm_C_fai_norm <- function(dar){
  1.1434 - 1.5360 * dar
}
dar_tmp <- dar_C$fai
lm_all <- lm_all %>%
  rbind(tibble(
    dataset = "C_fai",
    intercept = 1.1434,
    slope = -1.5360,
    dar_mean = mean(dar_tmp$dar_origin),
    dar_median = median(dar_tmp$dar_origin),
    dar_mean_0 = mean(dar_tmp$dar_origin[dar_tmp$dar_origin != 0]),
    dar_median_0 = median(dar_tmp$dar_origin[dar_tmp$dar_origin != 0])
  ))
beta_fits_C$fai %>%
  mutate(
    `1 - DAR` = 1  - dar,
    `sqrt(1 - DAR)` = sqrt(1 - dar),
    `(1.1 - DAR)^2` = (1.1 - dar)^2,
    `DAR lm` = lm_C_fai_norm(dar)
  ) %>%
  pivot_longer(cols = matches("DAR", FALSE), names_to = "f", values_to = "estimate") %>%
  mutate(estimate = ifelse(estimate > 1, 1, estimate)) %>%
  ggplot(aes(dar, shape1_norm)) +
  geom_point(
    data = . %>% distinct(dar, shape1_norm)
  ) +
  geom_line(aes(y = estimate, colour = f))

darP_C_fai_norm <- res_C$fai %>%
  as_tibble() %>%
  dplyr::select(
    range, starts_with("gene"), starts_with("log"), PValue, FDR
  ) %>%
  left_join(
    mcols(gene_dar_C$fai) %>% as_tibble() %>% dplyr::select(gene_id, dar)
  ) %>%
  mutate(
    dar = ifelse(is.na(dar), 0, dar),
    alpha = lm_C_fai_norm(dar),
    alpha = ifelse(alpha > 1, 1, alpha),
    alpha = ifelse(alpha < 0.1, 0.1, alpha),
    darP = pbeta(PValue, alpha, 1),
    FDR_darP = p.adjust(darP, "fdr"),
    wasDE = FDR < 0.05,
    isDE = FDR_darP < 0.05
  ) %>%
  dplyr::filter(wasDE | isDE) %>%
  dplyr::arrange(!isDE, darP) %>%
  as.data.frame()
kable(darP_C_fai_norm) %>%
  kable_paper() %>%
  column_spec(
    1:ncol(darP_C_fai_norm),
    background = ifelse(darP_C_fai_norm$isDE, "green4", "red3")
  )
range gene_id gene_name gene_biotype logFC logCPM PValue FDR dar alpha darP FDR_darP wasDE isDE
17:49785495-49978986:- ENSDARG00000078322 col12a1a protein_coding 1.9045922 3.854349 0.0e+00 0.0000195 0.2215742 0.8030620 1.00e-07 0.0007415 TRUE TRUE
17:50413084-50422584:- ENSDARG00000093378 si:ch211-235i11.5 protein_coding 0.6341595 3.057743 1.0e-07 0.0007415 0.0677979 1.0000000 1.00e-07 0.0007415 TRUE TRUE
7:32901658-32924520:+ ENSDARG00000079324 ano9b protein_coding 1.4319607 -1.240503 1.5e-06 0.0100260 0.0669498 1.0000000 1.50e-06 0.0106873 TRUE TRUE
17:51224421-51229737:+ ENSDARG00000020448 adi1 protein_coding 0.6019222 2.881561 1.8e-06 0.0100260 0.3331819 0.6316326 2.38e-04 0.4003521 TRUE FALSE
res_C$fai %>%
  as_tibble() %>%
  dplyr::select(
    range, starts_with("gene"), starts_with("log"), PValue, FDR
  ) %>%
  left_join(
    mcols(gene_dar_C$fai) %>% as_tibble() %>% dplyr::select(gene_id, dar)
  ) %>%
  mutate(
    dar = ifelse(is.na(dar), 0, dar),
    alpha = lm_C_fai_norm(dar),
    alpha = ifelse(alpha > 1, 1, alpha),
    alpha = ifelse(alpha < 0.1, 0.1, alpha),
    darP = pbeta(PValue, alpha, 1)
  ) %>%
  pivot_longer(
    cols = all_of(c("PValue", "darP")), names_to = "type", values_to = "p"
  ) %>%
  ggplot(aes(sample = -log(p), colour = type)) +
  stat_qq(distribution = qexp) +
  stat_qq_line(distribution = qexp) +
  facet_wrap(~type) +
  scale_colour_brewer(palette= "Set1")

Dataset D (mouse)

Sample structure

  • APOE2/APOE2 (\(n = 8\) female, \(n = 8\) male, targeted replacement of human APOE $$2 allele, Alzheimer’s disease protective)
  • APOE3/APOE3 (\(n = 8\) female, \(n = 8\) male, targeted replacement of human APOE $$3 allele, Alzheimer’s disease neutral)
  • APOE4/APOE4 (\(n = 8\) female, \(n = 8\) male, targeted replacement of human APOE $$4 allele, Alzheimer’s disease risk)

Load data

meta_D <- read_tsv(here("data/metadata/apoe_synapse.tsv")) %>%
  left_join(read_csv(here("data/metadata/apoe.csv"))) %>%
  dplyr::select(
    sample = specimenID, species, genotypeBackground, litter, dateBirth,
    dateDeath, genotype = Genotype, sex = Sex, age = Age, lane, basename = name,
    modelSystemName, individualID, study
  ) %>%
  dplyr::filter(str_detect(sample, "_3M_")) %>%
  mutate(basename = str_remove(basename, ".bam_R(1|2).fastq.gz")) %>%
  distinct(sample, .keep_all = TRUE) %>%
  mutate(
    group = as.factor(paste0(genotype, "_", age, "_", sex)),
    genotype = as.factor(genotype)
  ) %>%
  dplyr::arrange(genotype, group)
dar_D <- read_rds(here("data/dar/dar_loci_D.Rds")) %>%  # !!
  sortSeqlevels() %>%
  endoapply(sort)
gene_dar_D <- read_rds("data/dar/gene_dar_loci_D.Rds") %>%  # !!
  sortSeqlevels() %>%
  endoapply(sort)
dge_D <- read_rds("data/de/dgeList_apoe.Rds")
dge_D <- DGEList(
  counts = dge_D$counts,
  samples = dge_D$samples,
  genes = dge_D$genes
)
dge_D <- dge_D %>%
  estimateDisp(design = model.matrix(~0 + group, .$sample))
colnames(dge_D$design) <- str_remove_all(colnames(dge_D$design), "^group")
contrasts_D <- makeContrasts(
  apoe2v3_female = APOE2_3M_female - APOE3_3M_female,
  apoe2v3_male = APOE2_3M_male - APOE3_3M_male,
  apoe4v3_female = APOE4_3M_female - APOE3_3M_female,
  apoe4v3_male = APOE4_3M_male - APOE3_3M_male,
  levels = dge_D$samples$group
)
## Rerun the analysis using glmQL-Fits
res_D <- lapply(colnames(contrasts_D), function(x){
  dge_D %>%
    glmQLFit() %>%
    glmQLFTest(contrast = contrasts_D[,x]) %>%
    topTags(n = Inf) %>%
    .[["table"]] %>%
    makeGRangesFromDataFrame(keep.extra.columns = TRUE) %>%
    sortSeqlevels() %>%
    sort()
}) %>%
  set_names(colnames(contrasts_D))

Exploration

lapply(names(res_D), function(x){
  res_D[[x]] %>%
    mcols() %>%
    as_tibble() %>%
    dplyr::select(
      starts_with("gene"), starts_with("log"), PValue, FDR
    ) %>%
    left_join(
      mcols(gene_dar_D[[x]]) %>% as_tibble() %>% dplyr::select(gene_id, dar)
    ) %>%
    ggplot(aes(dar, -log10(PValue))) +
    geom_point() +
    labs(title = x)
})

lapply(names(res_D), function(x){
  res_D[[x]] %>%
    mcols() %>%
    as_tibble() %>%
    dplyr::select(
      starts_with("gene"), starts_with("log"), PValue, FDR
    ) %>%
    left_join(
      mcols(gene_dar_D[[x]]) %>% as_tibble() %>% dplyr::select(gene_id, dar)
    ) %>%
    mutate(
      dar_q = cut(
        dar,
        breaks = quantile(dar, probs = seq(0, 1, length.out = 21), na.rm = TRUE),
        # breaks = seq(0, 1, length.out = 7),
        include.lowest = TRUE
      ) %>%
        fct_na_value_to_level(levels(.)[[1]])
    ) %>%
    ggplot(aes(PValue, after_stat(density))) +
    geom_histogram(fill = "grey70", colour = "black", binwidth = 0.01) +
    geom_line(
      ## Add 1 - dar^2 in red
      aes(y = y, colour = f),
      data = . %>%
        pull(dar_q) %>%
        levels() %>%
        sapply(
          \(x) {
            min_dar = str_replace_all(x, "^[^0-9]([0-9\\.]+),.+", "\\1") %>% as.numeric()
            tibble(
              PValue = seq(0.01, 1, by = 0.01),
              `1 - DAR` = dbeta(PValue, 1 - min_dar, 1),
              `1 - DAR^2` = dbeta(PValue, 1 - min_dar^2, 1),
              `sqrt(1 - DAR)` = dbeta(PValue, sqrt(1 - min_dar), 1),
            )
          },
          simplify = FALSE
        ) %>%
        bind_rows(.id = "dar_q") %>%
        pivot_longer(
          cols = contains("- DAR"), names_to = "f", values_to = "y"
        ) %>%
        mutate(dar_q = fct_inorder(dar_q)),
    ) +
    geom_label(
      aes(x = 1, y = 0, label = label),
      data = . %>%
        summarise(n = dplyr::n(), .by = dar_q) %>%
        mutate(label = paste("n =", comma(n))),
      hjust = 1, vjust = 0, alpha = 0.7
    ) +
    facet_wrap(~dar_q, scales = "free_y") +
    labs(title = x)
})

beta_fits_D <- sapply(names(res_D), simplify = FALSE, function(x){
  res_D[[x]] %>%
    as_tibble() %>%
    dplyr::select(
      range, starts_with("gene"), starts_with("log"), PValue, FDR
    ) %>%
    left_join(
      mcols(gene_dar_D[[x]]) %>% as_tibble() %>% dplyr::select(gene_id, dar)
    ) %>%
    dplyr::filter(!is.na(dar)) %>%
    # mutate(dar_q = cut(dar, seq(0, 1, length.out = 5))) %>%
    mutate(
      dar_q = cut(
        dar,
        unique(quantile(dar, probs = seq(0, 1, length.out = 26))),
        include.lowest = TRUE
      )
    ) %>%
    split(.$dar_q) %>%
    lapply(
      \(x) {
        tryCatch({  # !!
          fit <- MASS::fitdistr(x$PValue, "beta", list(shape1 = 1, shape2 = 1))
          tibble(
            dar = median(x$dar),
            shape1 = fit$estimate["shape1"],
            se1 = fit$sd["shape1"],
            shape2 = fit$estimate["shape2"],
            se2 = fit$sd["shape2"]
          )
        }, error = \(e){
          tibble(
            dar = median(x$dar), shape1 = NA, se1 = NA, shape2 = NA, se2 = NA
          )
        })
      }
    ) %>%
    bind_rows() %>%
    mutate(shape1_norm = shape1 / max(shape1, na.rm = TRUE))
})
sapply(beta_fits_D, simplify = FALSE, function(x){
  as.data.frame(x)
})
## $apoe2v3_female
##            dar    shape1        se1    shape2        se2 shape1_norm
## 1  0.005895809 0.4408589 0.02192272 0.9516502 0.05768450   0.9264683
## 2  0.017029221 0.4443325 0.02205732 1.0054818 0.06143302   0.9337680
## 3  0.021807359 0.4102111 0.02021430 1.0301564 0.06439452   0.8620618
## 4  0.024431818 0.4240079 0.02093294 1.0311654 0.06392025   0.8910559
## 5  0.027083333 0.4166771 0.02057051 0.9868581 0.06092134   0.8756501
## 6  0.029788961 0.4243520 0.02096748 0.9233403 0.05588815   0.8917790
## 7  0.031982241 0.4588475 0.02283998 1.0678088 0.06561263   0.9642714
## 8  0.034334416 0.4167097 0.02043282 0.8990105 0.05397516   0.8757185
## 9  0.036147186 0.4185472 0.02071815 0.9779498 0.06032709   0.8795800
## 10 0.037813853 0.4612477 0.02303983 1.0836409 0.06691689   0.9693155
## 11 0.039393939 0.4534533 0.02268309 1.0239855 0.06289999   0.9529354
## 12 0.041071429 0.4758489 0.02369323 1.0545870 0.06394338   1.0000000
## 13 0.042576741 0.3908935 0.01920248 1.0435953 0.06623182   0.8214655
## 14 0.044372294 0.4121025 0.02014629 1.0236513 0.06330023   0.8660365
## 15 0.046428571 0.4536583 0.02261145 1.1162569 0.06955509   0.9533664
## 16 0.048674242 0.4548271 0.02269120 1.0748324 0.06646314   0.9558226
## 17 0.051055195 0.4109288 0.02031701 0.9245324 0.05657469   0.8635699
## 18 0.054215368 0.4557003 0.02266379 1.0484602 0.06425632   0.9576577
## 19 0.058028335 0.4390151 0.02175801 1.0313853 0.06349385   0.9225934
## 20 0.063950216 0.3919633 0.01916247 0.9207390 0.05649594   0.8237139
## 21 0.070596398 0.4321311 0.02151665 1.0258984 0.06368911   0.9081267
## 22 0.080438312 0.4336465 0.02155631 0.9389897 0.05701425   0.9113113
## 23 0.092478355 0.4368029 0.02153816 1.0792315 0.06684404   0.9179446
## 24 0.113849432 0.4141398 0.02048003 0.9733418 0.06012076   0.8703179
## 25 0.207954545 0.2453864 0.01161003 0.7515342 0.04971101   0.5156814
## 
## $apoe2v3_male
##           dar    shape1        se1    shape2        se2 shape1_norm
## 1  0.01003542 0.7070445 0.03749538 0.8796560 0.04907089   0.8913257
## 2  0.01837121 0.6895401 0.03628600 0.9233918 0.05197575   0.8692591
## 3  0.02253788 0.7308151 0.03853632 0.9363788 0.05222089   0.9212919
## 4  0.02605519 0.6647743 0.03489683 0.8545684 0.04761281   0.8380384
## 5  0.02905844 0.6354534 0.03303564 0.9006440 0.05085790   0.8010754
## 6  0.03149351 0.6821956 0.03659039 0.8600777 0.04871496   0.8600003
## 7  0.03402056 0.7729462 0.04106069 0.9667523 0.05395412   0.9744038
## 8  0.03608323 0.7440700 0.03950806 0.9927646 0.05618936   0.9380015
## 9  0.03790584 0.7177556 0.03727992 0.9142557 0.05018615   0.9048286
## 10 0.03988095 0.6517843 0.03491851 0.8784730 0.05051026   0.8216627
## 11 0.04199134 0.7932504 0.04215423 0.9945657 0.05551381   1.0000000
## 12 0.04376623 0.6537008 0.03436996 0.9036548 0.05126982   0.8240788
## 13 0.04529221 0.7431380 0.03957037 0.9071147 0.05053620   0.9368266
## 14 0.04784091 0.7140229 0.03785418 0.8876071 0.04947811   0.9001229
## 15 0.04980700 0.6776041 0.03561545 0.9329660 0.05279801   0.8542121
## 16 0.05224116 0.6989092 0.03644656 0.9396902 0.05243465   0.8810701
## 17 0.05492424 0.6431253 0.03387721 0.9468421 0.05459116   0.8107469
## 18 0.05743661 0.7614044 0.04024522 0.9393213 0.05203211   0.9598538
## 19 0.06087662 0.7219788 0.03846509 0.8982060 0.05031037   0.9101525
## 20 0.06525974 0.7222349 0.03843846 0.9112787 0.05114400   0.9104754
## 21 0.07305195 0.6909656 0.03615089 1.0105339 0.05759775   0.8710562
## 22 0.08363456 0.6419801 0.03348786 0.9436218 0.05386195   0.8093032
## 23 0.10108225 0.6680077 0.03498514 0.9454049 0.05365310   0.8421146
## 24 0.12421537 0.6178671 0.03234848 0.8173550 0.04581641   0.7789055
## 25 0.22518939 0.2981008 0.01452833 0.6412545 0.03914084   0.3757966
## 
## $apoe4v3_female
##           dar    shape1        se1    shape2        se2 shape1_norm
## 1  0.01715909 0.6935597 0.03666605 0.8571220 0.04761836   0.8802177
## 2  0.02289945 0.6383718 0.03347879 0.8862999 0.05025071   0.8101770
## 3  0.02678571 0.7220083 0.03813736 0.9630147 0.05427617   0.9163226
## 4  0.02954545 0.6777475 0.03533948 0.9124303 0.05097841   0.8601500
## 5  0.03227814 0.6656931 0.03526700 0.9046623 0.05149172   0.8448514
## 6  0.03422619 0.6440026 0.03384201 0.8453954 0.04741893   0.8173233
## 7  0.03613243 0.6135572 0.03195911 0.8538744 0.04818206   0.7786841
## 8  0.03802083 0.6375517 0.03319393 0.9332391 0.05313825   0.8091363
## 9  0.04024351 0.6698611 0.03523461 0.8829097 0.04955267   0.8501411
## 10 0.04219697 0.7272793 0.03862612 0.9375872 0.05274120   0.9230123
## 11 0.04431818 0.7381502 0.03928304 0.9138834 0.05104884   0.9368088
## 12 0.04638528 0.6829490 0.03606691 0.8561623 0.04768513   0.8667514
## 13 0.04853896 0.6821394 0.03595566 0.8964459 0.05035559   0.8657238
## 14 0.05096419 0.6740780 0.03535393 0.9178848 0.05172020   0.8554929
## 15 0.05413510 0.6980756 0.03678142 0.9389514 0.05294476   0.8859490
## 16 0.05833333 0.7563666 0.04044936 0.9247554 0.05173802   0.9599278
## 17 0.06185065 0.7879411 0.04198970 0.9803848 0.05480031   1.0000000
## 18 0.06780303 0.7341560 0.03852348 1.0136367 0.05712156   0.9317397
## 19 0.07391324 0.7249206 0.03844740 0.9877847 0.05613905   0.9200188
## 20 0.08255411 0.7514358 0.03986454 0.9724617 0.05463010   0.9536700
## 21 0.09359217 0.7434977 0.03873686 1.0146336 0.05661032   0.9435955
## 22 0.10587121 0.5928451 0.03132055 0.8146662 0.04654984   0.7523977
## 23 0.11870130 0.7032247 0.03726117 0.9020024 0.05061759   0.8924838
## 24 0.14347944 0.6549423 0.03411317 0.8927690 0.05002193   0.8312071
## 25 0.57206633 0.3111342 0.01534392 0.6262067 0.03789130   0.3948699
## 
## $apoe4v3_male
##           dar    shape1         se1    shape2        se2 shape1_norm
## 1  0.01420455 0.3725740 0.018275029 0.8675891 0.05353546   0.9530350
## 2  0.02229437 0.3567168 0.017389213 0.8605068 0.05341421   0.9124728
## 3  0.02517587 0.3683323 0.018032883 0.8628615 0.05327017   0.9421850
## 4  0.02825758 0.3669960 0.017953374 0.8542788 0.05263468   0.9387666
## 5  0.03130780 0.3847493 0.018782922 0.8766585 0.05341932   0.9841791
## 6  0.03375000 0.3389318 0.016626008 0.8544133 0.05418042   0.8669791
## 7  0.03615260 0.3276340 0.015903688 0.8498837 0.05386473   0.8380797
## 8  0.03827922 0.3509945 0.017072133 0.8236148 0.05073704   0.8978352
## 9  0.04050325 0.3816951 0.018713943 0.9638332 0.06044496   0.9763666
## 10 0.04236472 0.3314705 0.015985673 0.8803795 0.05569667   0.8478934
## 11 0.04494048 0.3545042 0.017322167 0.8689680 0.05430426   0.9068130
## 12 0.04696970 0.3447502 0.016812664 0.8492054 0.05315652   0.8818624
## 13 0.04903950 0.3460643 0.016789326 0.8881060 0.05583386   0.8852238
## 14 0.05159091 0.3872454 0.019083628 0.9266671 0.05762254   0.9905641
## 15 0.05473731 0.3823639 0.018690182 1.0193857 0.06450392   0.9780773
## 16 0.05812771 0.3779848 0.018527386 0.8623896 0.05285629   0.9668758
## 17 0.06224026 0.3409698 0.016633905 0.8086566 0.05021197   0.8721923
## 18 0.06680646 0.3564419 0.017489721 0.8094519 0.04983988   0.9117695
## 19 0.07306818 0.3832031 0.018857892 0.8659887 0.05310794   0.9802241
## 20 0.08106061 0.3358194 0.016315572 0.8342687 0.05225068   0.8590176
## 21 0.09227420 0.3649044 0.017465796 0.9476836 0.05864959   0.9334164
## 22 0.10597944 0.3388895 0.016655153 0.9247661 0.05987964   0.8668709
## 23 0.12145563 0.3341856 0.016244916 0.8464944 0.05332085   0.8548384
## 24 0.15549242 0.3909342 0.019239594 0.9050027 0.05572511   1.0000000
## 25 0.62691739 0.1988180 0.009277477 0.6740541 0.04604119   0.5085714

APOE2v3 female

plotly::ggplotly({
  plotDarECDF(dar = dar_D$apoe2v3_female, dar_val = "origin")
})

Before normalisation of alpha

# lm(shape1 ~ dar + I(dar^2), data = beta_fits_D$apoe2v3_female) %>%
lm(shape1 ~ dar, data = beta_fits_D$apoe2v3_female) %>%
  step() %>%
  summary()
## Start:  AIC=-172.9
## shape1 ~ dar
## 
##        Df Sum of Sq      RSS    AIC
## <none>              0.021128 -172.9
## - dar   1  0.023862 0.044990 -156.0
## 
## Call:
## lm(formula = shape1 ~ dar, data = beta_fits_D$apoe2v3_female)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.057750 -0.022453 -0.008388  0.024811  0.043194 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.46606    0.01014  45.976  < 2e-16 ***
## dar         -0.78347    0.15372  -5.097 3.67e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03031 on 23 degrees of freedom
## Multiple R-squared:  0.5304, Adjusted R-squared:   0.51 
## F-statistic: 25.98 on 1 and 23 DF,  p-value: 3.671e-05
lm_D_apoe2v3_female <- function(dar){
  0.52339 - 1.07314 * dar
}
beta_fits_D$apoe2v3_female %>%
  mutate(
    `1 - DAR` = 1  - dar,
    `sqrt(1 - DAR)` = sqrt(1 - dar),
    `(1.1 - DAR)^2` = (1.1 - dar)^2,
    `DAR lm` = lm_D_apoe2v3_female(dar)
  ) %>%
  pivot_longer(cols = matches("DAR", FALSE), names_to = "f", values_to = "estimate") %>%
  mutate(estimate = ifelse(estimate > 1, 1, estimate)) %>%
  ggplot(aes(dar, shape1)) +
  geom_point(
    data = . %>% distinct(dar, shape1)
  ) +
  geom_line(aes(y = estimate, colour = f))

darP_D_apoe2v3_female <- res_D$apoe2v3_female %>%
  as_tibble() %>%
  dplyr::select(
    range, starts_with("gene"), starts_with("log"), PValue, FDR
  ) %>%
  left_join(
    mcols(gene_dar_D$apoe2v3_female) %>% as_tibble() %>% dplyr::select(gene_id, dar)
  ) %>%
  mutate(
    dar = ifelse(is.na(dar), 0, dar),
    alpha = lm_D_apoe2v3_female(dar),
    alpha = ifelse(alpha > 1, 1, alpha),
    alpha = ifelse(alpha < 0.1, 0.1, alpha),
    darP = pbeta(PValue, alpha, 1),
    FDR_darP = p.adjust(darP, "fdr"),
    wasDE = FDR < 0.05,
    isDE = FDR_darP < 0.05
  ) %>%
  dplyr::filter(wasDE | isDE) %>%
  dplyr::arrange(!isDE, darP) %>%
  as.data.frame()
table(darP_D_apoe2v3_female$isDE)
## 
## FALSE  TRUE 
##  1470     2
res_D$apoe2v3_female %>%
  as_tibble() %>%
  dplyr::select(
    range, starts_with("gene"), starts_with("log"), PValue, FDR
  ) %>%
  left_join(
    mcols(gene_dar_D$apoe2v3_female) %>% as_tibble() %>% dplyr::select(gene_id, dar)
  ) %>%
  mutate(
    dar = ifelse(is.na(dar), 0, dar),
    alpha = lm_D_apoe2v3_female(dar),
    alpha = ifelse(alpha > 1, 1, alpha),
    alpha = ifelse(alpha < 0.1, 0.1, alpha),
    darP = pbeta(PValue, alpha, 1)
  ) %>%
  pivot_longer(
    cols = all_of(c("PValue", "darP")), names_to = "type", values_to = "p"
  ) %>%
  ggplot(aes(sample = -log(p), colour = type)) +
  stat_qq(distribution = qexp) +
  stat_qq_line(distribution = qexp) +
  facet_wrap(~type) +
  scale_colour_brewer(palette= "Set1")

After normalisation of alpha

# lm(shape1_norm ~ dar + I(dar^2), data = beta_fits_D$apoe2v3_female) %>%
lm(shape1_norm ~ dar, data = beta_fits_D$apoe2v3_female) %>%
  step() %>%
  summary()
## Start:  AIC=-135.77
## shape1_norm ~ dar
## 
##        Df Sum of Sq      RSS     AIC
## <none>              0.093307 -135.77
## - dar   1   0.10538 0.198689 -118.87
## 
## Call:
## lm(formula = shape1_norm ~ dar, data = beta_fits_D$apoe2v3_female)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.12136 -0.04718 -0.01763  0.05214  0.09077 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   0.9794     0.0213  45.976  < 2e-16 ***
## dar          -1.6465     0.3231  -5.097 3.67e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.06369 on 23 degrees of freedom
## Multiple R-squared:  0.5304, Adjusted R-squared:   0.51 
## F-statistic: 25.98 on 1 and 23 DF,  p-value: 3.671e-05
lm_D_apoe2v3_female_norm <- function(dar){
  1.10382 - 2.26321 * dar
}
dar_tmp <- dar_D$apoe2v3_female
lm_all <- lm_all %>%
  rbind(tibble(
    dataset = "D_apoe2v3_female",
    intercept = 1.10382,
    slope = -2.26321,
    dar_mean = mean(dar_tmp$dar_origin),
    dar_median = median(dar_tmp$dar_origin),
    dar_mean_0 = mean(dar_tmp$dar_origin[dar_tmp$dar_origin != 0]),
    dar_median_0 = median(dar_tmp$dar_origin[dar_tmp$dar_origin != 0])
  ))
beta_fits_D$apoe2v3_female %>%
  mutate(
    `1 - DAR` = 1  - dar,
    `sqrt(1 - DAR)` = sqrt(1 - dar),
    `(1.1 - DAR)^2` = (1.1 - dar)^2,
    `DAR lm` = lm_D_apoe2v3_female_norm(dar)
  ) %>%
  pivot_longer(cols = matches("DAR", FALSE), names_to = "f", values_to = "estimate") %>%
  mutate(estimate = ifelse(estimate > 1, 1, estimate)) %>%
  ggplot(aes(dar, shape1_norm)) +
  geom_point(
    data = . %>% distinct(dar, shape1_norm)
  ) +
  geom_line(aes(y = estimate, colour = f))

darP_D_apoe2v3_female_norm <- res_D$apoe2v3_female %>%
  as_tibble() %>%
  dplyr::select(
    range, starts_with("gene"), starts_with("log"), PValue, FDR
  ) %>%
  left_join(
    mcols(gene_dar_D$apoe2v3_female) %>% as_tibble() %>% dplyr::select(gene_id, dar)
  ) %>%
  mutate(
    dar = ifelse(is.na(dar), 0, dar),
    alpha = lm_D_apoe2v3_female_norm(dar),
    alpha = ifelse(alpha > 1, 1, alpha),
    alpha = ifelse(alpha < 0.1, 0.1, alpha),
    darP = pbeta(PValue, alpha, 1),
    FDR_darP = p.adjust(darP, "fdr"),
    wasDE = FDR < 0.05,
    isDE = FDR_darP < 0.05
  ) %>%
  dplyr::filter(wasDE | isDE) %>%
  dplyr::arrange(!isDE, darP) %>%
  as.data.frame()
table(darP_D_apoe2v3_female_norm$isDE)
## 
## FALSE  TRUE 
##   404  1068
res_D$apoe2v3_female %>%
  as_tibble() %>%
  dplyr::select(
    range, starts_with("gene"), starts_with("log"), PValue, FDR
  ) %>%
  left_join(
    mcols(gene_dar_D$apoe2v3_female) %>% as_tibble() %>% dplyr::select(gene_id, dar)
  ) %>%
  mutate(
    dar = ifelse(is.na(dar), 0, dar),
    alpha = lm_D_apoe2v3_female_norm(dar),
    alpha = ifelse(alpha > 1, 1, alpha),
    alpha = ifelse(alpha < 0.1, 0.1, alpha),
    darP = pbeta(PValue, alpha, 1)
  ) %>%
  pivot_longer(
    cols = all_of(c("PValue", "darP")), names_to = "type", values_to = "p"
  ) %>%
  ggplot(aes(sample = -log(p), colour = type)) +
  stat_qq(distribution = qexp) +
  stat_qq_line(distribution = qexp) +
  facet_wrap(~type) +
  scale_colour_brewer(palette= "Set1")

APOE2v3 male

plotly::ggplotly({
  plotDarECDF(dar = dar_D$apoe2v3_male, dar_val = "origin")
})

Before normalisation of alpha

# lm(shape1 ~ dar + I(dar^2), data = beta_fits_D$apoe2v3_male) %>%
lm(shape1 ~ dar, data = beta_fits_D$apoe2v3_male) %>%
  step() %>%
  summary()
## Start:  AIC=-142.35
## shape1 ~ dar
## 
##        Df Sum of Sq      RSS     AIC
## <none>              0.071713 -142.35
## - dar   1   0.13078 0.202492 -118.40
## 
## Call:
## lm(formula = shape1 ~ dar, data = beta_fits_D$apoe2v3_male)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.097239 -0.049590  0.006362  0.047731  0.086947 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.77758    0.01854  41.933  < 2e-16 ***
## dar         -1.69742    0.26209  -6.476 1.31e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.05584 on 23 degrees of freedom
## Multiple R-squared:  0.6459, Adjusted R-squared:  0.6305 
## F-statistic: 41.94 on 1 and 23 DF,  p-value: 1.312e-06
lm_D_apoe2v3_male <- function(dar){
  0.65566 - 1.46188 * dar
}
beta_fits_D$apoe2v3_male %>%
  mutate(
    `1 - DAR` = 1  - dar,
    `sqrt(1 - DAR)` = sqrt(1 - dar),
    `(1.1 - DAR)^2` = (1.1 - dar)^2,
    `DAR lm` = lm_D_apoe2v3_male(dar)
  ) %>%
  pivot_longer(cols = matches("DAR", FALSE), names_to = "f", values_to = "estimate") %>%
  mutate(estimate = ifelse(estimate > 1, 1, estimate)) %>%
  ggplot(aes(dar, shape1)) +
  geom_point(
    data = . %>% distinct(dar, shape1)
  ) +
  geom_line(aes(y = estimate, colour = f))

darP_D_apoe2v3_male <- res_D$apoe2v3_male %>%
  as_tibble() %>%
  dplyr::select(
    range, starts_with("gene"), starts_with("log"), PValue, FDR
  ) %>%
  left_join(
    mcols(gene_dar_D$apoe2v3_male) %>% as_tibble() %>% dplyr::select(gene_id, dar)
  ) %>%
  mutate(
    dar = ifelse(is.na(dar), 0, dar),
    alpha = lm_D_apoe2v3_male(dar),
    alpha = ifelse(alpha > 1, 1, alpha),
    alpha = ifelse(alpha < 0.1, 0.1, alpha),
    darP = pbeta(PValue, alpha, 1),
    FDR_darP = p.adjust(darP, "fdr"),
    wasDE = FDR < 0.05,
    isDE = FDR_darP < 0.05
  ) %>%
  dplyr::filter(wasDE | isDE) %>%
  dplyr::arrange(!isDE, darP) %>%
  as.data.frame()
table(darP_D_apoe2v3_male$isDE)
## 
## FALSE  TRUE 
##   128     5
res_D$apoe2v3_male %>%
  as_tibble() %>%
  dplyr::select(
    range, starts_with("gene"), starts_with("log"), PValue, FDR
  ) %>%
  left_join(
    mcols(gene_dar_D$apoe2v3_male) %>% as_tibble() %>% dplyr::select(gene_id, dar)
  ) %>%
  mutate(
    dar = ifelse(is.na(dar), 0, dar),
    alpha = lm_D_apoe2v3_male(dar),
    alpha = ifelse(alpha > 1, 1, alpha),
    alpha = ifelse(alpha < 0.1, 0.1, alpha),
    darP = pbeta(PValue, alpha, 1)
  ) %>%
  pivot_longer(
    cols = all_of(c("PValue", "darP")), names_to = "type", values_to = "p"
  ) %>%
  ggplot(aes(sample = -log(p), colour = type)) +
  stat_qq(distribution = qexp) +
  stat_qq_line(distribution = qexp) +
  facet_wrap(~type) +
  scale_colour_brewer(palette= "Set1")

After normalisation of alpha

# lm(shape1_norm ~ dar + I(dar^2), data = beta_fits_D$apoe2v3_male) %>%
lm(shape1_norm ~ dar, data = beta_fits_D$apoe2v3_male) %>%
  step() %>%
  summary()
## Start:  AIC=-130.77
## shape1_norm ~ dar
## 
##        Df Sum of Sq     RSS     AIC
## <none>              0.11397 -130.77
## - dar   1   0.20784 0.32180 -106.82
## 
## Call:
## lm(formula = shape1_norm ~ dar, data = beta_fits_D$apoe2v3_male)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.122583 -0.062515  0.008021  0.060172  0.109608 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.98025    0.02338  41.933  < 2e-16 ***
## dar         -2.13983    0.33040  -6.476 1.31e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.07039 on 23 degrees of freedom
## Multiple R-squared:  0.6459, Adjusted R-squared:  0.6305 
## F-statistic: 41.94 on 1 and 23 DF,  p-value: 1.312e-06
lm_D_apoe2v3_male_norm <- function(dar){
  1.12123 - 2.49993 * dar
}
dar_tmp <- dar_D$apoe2v3_male
lm_all <- lm_all %>%
  rbind(tibble(
    dataset = "D_apoe2v3_male",
    intercept = 1.12123,
    slope = -2.49993,
    dar_mean = mean(dar_tmp$dar_origin),
    dar_median = median(dar_tmp$dar_origin),
    dar_mean_0 = mean(dar_tmp$dar_origin[dar_tmp$dar_origin != 0]),
    dar_median_0 = median(dar_tmp$dar_origin[dar_tmp$dar_origin != 0])
  ))
beta_fits_D$apoe2v3_male %>%
  mutate(
    `1 - DAR` = 1  - dar,
    `sqrt(1 - DAR)` = sqrt(1 - dar),
    `(1.1 - DAR)^2` = (1.1 - dar)^2,
    `DAR lm` = lm_D_apoe2v3_male_norm(dar)
  ) %>%
  pivot_longer(cols = matches("DAR", FALSE), names_to = "f", values_to = "estimate") %>%
  mutate(estimate = ifelse(estimate > 1, 1, estimate)) %>%
  ggplot(aes(dar, shape1_norm)) +
  geom_point(
    data = . %>% distinct(dar, shape1_norm)
  ) +
  geom_line(aes(y = estimate, colour = f))

darP_D_apoe2v3_male_norm <- res_D$apoe2v3_male %>%
  as_tibble() %>%
  dplyr::select(
    range, starts_with("gene"), starts_with("log"), PValue, FDR
  ) %>%
  left_join(
    mcols(gene_dar_D$apoe2v3_male) %>% as_tibble() %>% dplyr::select(gene_id, dar)
  ) %>%
  mutate(
    dar = ifelse(is.na(dar), 0, dar),
    alpha = lm_D_apoe2v3_male_norm(dar),
    alpha = ifelse(alpha > 1, 1, alpha),
    alpha = ifelse(alpha < 0.1, 0.1, alpha),
    darP = pbeta(PValue, alpha, 1),
    FDR_darP = p.adjust(darP, "fdr"),
    wasDE = FDR < 0.05,
    isDE = FDR_darP < 0.05
  ) %>%
  dplyr::filter(wasDE | isDE) %>%
  dplyr::arrange(!isDE, darP) %>%
  as.data.frame()
table(darP_D_apoe2v3_male_norm$isDE)
## 
## FALSE  TRUE 
##   102    31
res_D$apoe2v3_male %>%
  as_tibble() %>%
  dplyr::select(
    range, starts_with("gene"), starts_with("log"), PValue, FDR
  ) %>%
  left_join(
    mcols(gene_dar_D$apoe2v3_male) %>% as_tibble() %>% dplyr::select(gene_id, dar)
  ) %>%
  mutate(
    dar = ifelse(is.na(dar), 0, dar),
    alpha = lm_D_apoe2v3_male_norm(dar),
    alpha = ifelse(alpha > 1, 1, alpha),
    alpha = ifelse(alpha < 0.1, 0.1, alpha),
    darP = pbeta(PValue, alpha, 1)
  ) %>%
  pivot_longer(
    cols = all_of(c("PValue", "darP")), names_to = "type", values_to = "p"
  ) %>%
  ggplot(aes(sample = -log(p), colour = type)) +
  stat_qq(distribution = qexp) +
  stat_qq_line(distribution = qexp) +
  facet_wrap(~type) +
  scale_colour_brewer(palette= "Set1")

APOE4v3 female

plotly::ggplotly({
  plotDarECDF(dar = dar_D$apoe4v3_female, dar_val = "origin")
})

Before normalisation of alpha

# lm(shape1 ~ dar + I(dar^2), data = beta_fits_D$apoe4v3_female) %>%
lm(shape1 ~ dar, data = beta_fits_D$apoe4v3_female) %>%
  step() %>%
  summary()
## Start:  AIC=-143.06
## shape1 ~ dar
## 
##        Df Sum of Sq      RSS     AIC
## <none>              0.069701 -143.06
## - dar   1   0.12397 0.193670 -119.51
## 
## Call:
## lm(formula = shape1 ~ dar, data = beta_fits_D$apoe4v3_female)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.09121 -0.03570 -0.01434  0.04539  0.10035 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.72890    0.01368  53.300  < 2e-16 ***
## dar         -0.66787    0.10442  -6.396 1.58e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.05505 on 23 degrees of freedom
## Multiple R-squared:  0.6401, Adjusted R-squared:  0.6245 
## F-statistic: 40.91 on 1 and 23 DF,  p-value: 1.585e-06
lm_D_apoe4v3_female <- function(dar){
  0.64361 - 0.59121 * dar
}
beta_fits_D$apoe4v3_female %>%
  mutate(
    `1 - DAR` = 1  - dar,
    `sqrt(1 - DAR)` = sqrt(1 - dar),
    `(1.1 - DAR)^2` = (1.1 - dar)^2,
    `DAR lm` = lm_D_apoe4v3_female(dar)
  ) %>%
  pivot_longer(cols = matches("DAR", FALSE), names_to = "f", values_to = "estimate") %>%
  mutate(estimate = ifelse(estimate > 1, 1, estimate)) %>%
  ggplot(aes(dar, shape1)) +
  geom_point(
    data = . %>% distinct(dar, shape1)
  ) +
  geom_line(aes(y = estimate, colour = f))

darP_D_apoe4v3_female <- res_D$apoe4v3_female %>%
  as_tibble() %>%
  dplyr::select(
    range, starts_with("gene"), starts_with("log"), PValue, FDR
  ) %>%
  left_join(
    mcols(gene_dar_D$apoe4v3_female) %>% as_tibble() %>% dplyr::select(gene_id, dar)
  ) %>%
  mutate(
    dar = ifelse(is.na(dar), 0, dar),
    alpha = lm_D_apoe4v3_female(dar),
    alpha = ifelse(alpha > 1, 1, alpha),
    alpha = ifelse(alpha < 0.1, 0.1, alpha),
    darP = pbeta(PValue, alpha, 1),
    FDR_darP = p.adjust(darP, "fdr"),
    wasDE = FDR < 0.05,
    isDE = FDR_darP < 0.05
  ) %>%
  dplyr::filter(wasDE | isDE) %>%
  dplyr::arrange(!isDE, darP) %>%
  as.data.frame()
table(darP_D_apoe4v3_female$isDE)
## 
## FALSE  TRUE 
##   171    18
res_D$apoe4v3_female %>%
  as_tibble() %>%
  dplyr::select(
    range, starts_with("gene"), starts_with("log"), PValue, FDR
  ) %>%
  left_join(
    mcols(gene_dar_D$apoe4v3_female) %>% as_tibble() %>% dplyr::select(gene_id, dar)
  ) %>%
  mutate(
    dar = ifelse(is.na(dar), 0, dar),
    alpha = lm_D_apoe4v3_female(dar),
    alpha = ifelse(alpha > 1, 1, alpha),
    alpha = ifelse(alpha < 0.1, 0.1, alpha),
    darP = pbeta(PValue, alpha, 1)
  ) %>%
  pivot_longer(
    cols = all_of(c("PValue", "darP")), names_to = "type", values_to = "p"
  ) %>%
  ggplot(aes(sample = -log(p), colour = type)) +
  stat_qq(distribution = qexp) +
  stat_qq_line(distribution = qexp) +
  facet_wrap(~type) +
  scale_colour_brewer(palette= "Set1")

After normalisation of alpha

# lm(shape1_norm ~ dar + I(dar^2), data = beta_fits_D$apoe4v3_female) %>%
lm(shape1_norm ~ dar, data = beta_fits_D$apoe4v3_female) %>%
  step() %>%
  summary()
## Start:  AIC=-131.14
## shape1_norm ~ dar
## 
##        Df Sum of Sq     RSS     AIC
## <none>              0.11227 -131.14
## - dar   1   0.19967 0.31194 -107.59
## 
## Call:
## lm(formula = shape1_norm ~ dar, data = beta_fits_D$apoe4v3_female)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.11576 -0.04531 -0.01820  0.05760  0.12736 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.92507    0.01736  53.300  < 2e-16 ***
## dar         -0.84761    0.13252  -6.396 1.58e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.06987 on 23 degrees of freedom
## Multiple R-squared:  0.6401, Adjusted R-squared:  0.6245 
## F-statistic: 40.91 on 1 and 23 DF,  p-value: 1.585e-06
lm_D_apoe4v3_female_norm <- function(dar){
  1.06010 - 0.97379 * dar
}
dar_tmp <- dar_D$apoe4v3_female
lm_all <- lm_all %>%
  rbind(tibble(
    dataset = "D_apoe4v3_female",
    intercept = 1.06010,
    slope = -0.97379,
    dar_mean = mean(dar_tmp$dar_origin),
    dar_median = median(dar_tmp$dar_origin),
    dar_mean_0 = mean(dar_tmp$dar_origin[dar_tmp$dar_origin != 0]),
    dar_median_0 = median(dar_tmp$dar_origin[dar_tmp$dar_origin != 0])
  ))
beta_fits_D$apoe4v3_female %>%
  mutate(
    `1 - DAR` = 1  - dar,
    `sqrt(1 - DAR)` = sqrt(1 - dar),
    `(1.1 - DAR)^2` = (1.1 - dar)^2,
    `DAR lm` = lm_D_apoe4v3_female_norm(dar)
  ) %>%
  pivot_longer(cols = matches("DAR", FALSE), names_to = "f", values_to = "estimate") %>%
  mutate(estimate = ifelse(estimate > 1, 1, estimate)) %>%
  ggplot(aes(dar, shape1_norm)) +
  geom_point(
    data = . %>% distinct(dar, shape1_norm)
  ) +
  geom_line(aes(y = estimate, colour = f))

darP_D_apoe4v3_female_norm <- res_D$apoe4v3_female %>%
  as_tibble() %>%
  dplyr::select(
    range, starts_with("gene"), starts_with("log"), PValue, FDR
  ) %>%
  left_join(
    mcols(gene_dar_D$apoe4v3_female) %>% as_tibble() %>% dplyr::select(gene_id, dar)
  ) %>%
  mutate(
    dar = ifelse(is.na(dar), 0, dar),
    alpha = lm_D_apoe4v3_female_norm(dar),
    alpha = ifelse(alpha > 1, 1, alpha),
    alpha = ifelse(alpha < 0.1, 0.1, alpha),
    darP = pbeta(PValue, alpha, 1),
    FDR_darP = p.adjust(darP, "fdr"),
    wasDE = FDR < 0.05,
    isDE = FDR_darP < 0.05
  ) %>%
  dplyr::filter(wasDE | isDE) %>%
  dplyr::arrange(!isDE, darP) %>%
  as.data.frame()
table(darP_D_apoe4v3_female_norm$isDE)
## 
## FALSE  TRUE 
##    76   113
res_D$apoe4v3_female %>%
  as_tibble() %>%
  dplyr::select(
    range, starts_with("gene"), starts_with("log"), PValue, FDR
  ) %>%
  left_join(
    mcols(gene_dar_D$apoe4v3_female) %>% as_tibble() %>% dplyr::select(gene_id, dar)
  ) %>%
  mutate(
    dar = ifelse(is.na(dar), 0, dar),
    alpha = lm_D_apoe4v3_female_norm(dar),
    alpha = ifelse(alpha > 1, 1, alpha),
    alpha = ifelse(alpha < 0.1, 0.1, alpha),
    darP = pbeta(PValue, alpha, 1)
  ) %>%
  pivot_longer(
    cols = all_of(c("PValue", "darP")), names_to = "type", values_to = "p"
  ) %>%
  ggplot(aes(sample = -log(p), colour = type)) +
  stat_qq(distribution = qexp) +
  stat_qq_line(distribution = qexp) +
  facet_wrap(~type) +
  scale_colour_brewer(palette= "Set1")

APOE4v3 male

plotly::ggplotly({
  plotDarECDF(dar = dar_D$apoe4v3_male, dar_val = "origin")
})

Before normalisation of alpha

# lm(shape1 ~ dar + I(dar^2), data = beta_fits_D$apoe4v3_male) %>%
lm(shape1 ~ dar, data = beta_fits_D$apoe4v3_male) %>%
  step() %>%
  summary()
## Start:  AIC=-188.4
## shape1 ~ dar
## 
##        Df Sum of Sq      RSS    AIC
## <none>              0.011368 -188.4
## - dar   1  0.022646 0.034013 -163.0
## 
## Call:
## lm(formula = shape1 ~ dar, data = beta_fits_D$apoe4v3_male)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.03644 -0.01467 -0.00710  0.01875  0.05777 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.373438   0.005401  69.147  < 2e-16 ***
## dar         -0.258998   0.038262  -6.769 6.65e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02223 on 23 degrees of freedom
## Multiple R-squared:  0.6658, Adjusted R-squared:  0.6513 
## F-statistic: 45.82 on 1 and 23 DF,  p-value: 6.645e-07
lm_D_apoe4v3_male <- function(dar){
  0.339208 - 0.229183 * dar
}
beta_fits_D$apoe4v3_male %>%
  mutate(
    `1 - DAR` = 1  - dar,
    `sqrt(1 - DAR)` = sqrt(1 - dar),
    `(1.1 - DAR)^2` = (1.1 - dar)^2,
    `DAR lm` = lm_D_apoe4v3_male(dar)
  ) %>%
  pivot_longer(cols = matches("DAR", FALSE), names_to = "f", values_to = "estimate") %>%
  mutate(estimate = ifelse(estimate > 1, 1, estimate)) %>%
  ggplot(aes(dar, shape1)) +
  geom_point(
    data = . %>% distinct(dar, shape1)
  ) +
  geom_line(aes(y = estimate, colour = f))

darP_D_apoe4v3_male <- res_D$apoe4v3_male %>%
  as_tibble() %>%
  dplyr::select(
    range, starts_with("gene"), starts_with("log"), PValue, FDR
  ) %>%
  left_join(
    mcols(gene_dar_D$apoe4v3_male) %>% as_tibble() %>% dplyr::select(gene_id, dar)
  ) %>%
  mutate(
    dar = ifelse(is.na(dar), 0, dar),
    alpha = lm_D_apoe4v3_male(dar),
    alpha = ifelse(alpha > 1, 1, alpha),
    alpha = ifelse(alpha < 0.1, 0.1, alpha),
    darP = pbeta(PValue, alpha, 1),
    FDR_darP = p.adjust(darP, "fdr"),
    wasDE = FDR < 0.05,
    isDE = FDR_darP < 0.05
  ) %>%
  dplyr::filter(wasDE | isDE) %>%
  dplyr::arrange(!isDE, darP) %>%
  as.data.frame()
table(darP_D_apoe4v3_male$isDE)
## 
## FALSE  TRUE 
##  2662     2
res_D$apoe4v3_male %>%
  as_tibble() %>%
  dplyr::select(
    range, starts_with("gene"), starts_with("log"), PValue, FDR
  ) %>%
  left_join(
    mcols(gene_dar_D$apoe4v3_male) %>% as_tibble() %>% dplyr::select(gene_id, dar)
  ) %>%
  mutate(
    dar = ifelse(is.na(dar), 0, dar),
    alpha = lm_D_apoe4v3_male(dar),
    alpha = ifelse(alpha > 1, 1, alpha),
    alpha = ifelse(alpha < 0.1, 0.1, alpha),
    darP = pbeta(PValue, alpha, 1)
  ) %>%
  pivot_longer(
    cols = all_of(c("PValue", "darP")), names_to = "type", values_to = "p"
  ) %>%
  ggplot(aes(sample = -log(p), colour = type)) +
  stat_qq(distribution = qexp) +
  stat_qq_line(distribution = qexp) +
  facet_wrap(~type) +
  scale_colour_brewer(palette= "Set1")

After normalisation of alpha

# lm(shape1_norm ~ dar + I(dar^2), data = beta_fits_D$apoe4v3_male) %>%
lm(shape1_norm ~ dar, data = beta_fits_D$apoe4v3_male) %>%
  step() %>%
  summary()
## Start:  AIC=-141.44
## shape1_norm ~ dar
## 
##        Df Sum of Sq     RSS     AIC
## <none>              0.07438 -141.44
## - dar   1   0.14818 0.22256 -116.04
## 
## Call:
## lm(formula = shape1_norm ~ dar, data = beta_fits_D$apoe4v3_male)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.09321 -0.03753 -0.01816  0.04796  0.14777 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.95524    0.01381  69.147  < 2e-16 ***
## dar         -0.66251    0.09787  -6.769 6.65e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.05687 on 23 degrees of freedom
## Multiple R-squared:  0.6658, Adjusted R-squared:  0.6513 
## F-statistic: 45.82 on 1 and 23 DF,  p-value: 6.645e-07
lm_D_apoe4v3_male_norm <- function(dar){
  1.05298 - 0.71144 * dar
}
dar_tmp <- dar_D$apoe4v3_male
lm_all <- lm_all %>%
  rbind(tibble(
    dataset = "D_apoe4v3_male",
    intercept = 1.05298,
    slope = -0.71144,
    dar_mean = mean(dar_tmp$dar_origin),
    dar_median = median(dar_tmp$dar_origin),
    dar_mean_0 = mean(dar_tmp$dar_origin[dar_tmp$dar_origin != 0]),
    dar_median_0 = median(dar_tmp$dar_origin[dar_tmp$dar_origin != 0])
  ))
beta_fits_D$apoe4v3_male %>%
  mutate(
    `1 - DAR` = 1  - dar,
    `sqrt(1 - DAR)` = sqrt(1 - dar),
    `(1.1 - DAR)^2` = (1.1 - dar)^2,
    `DAR lm` = lm_D_apoe4v3_male_norm(dar)
  ) %>%
  pivot_longer(cols = matches("DAR", FALSE), names_to = "f", values_to = "estimate") %>%
  mutate(estimate = ifelse(estimate > 1, 1, estimate)) %>%
  ggplot(aes(dar, shape1_norm)) +
  geom_point(
    data = . %>% distinct(dar, shape1_norm)
  ) +
  geom_line(aes(y = estimate, colour = f))

darP_D_apoe4v3_male_norm <- res_D$apoe4v3_male %>%
  as_tibble() %>%
  dplyr::select(
    range, starts_with("gene"), starts_with("log"), PValue, FDR
  ) %>%
  left_join(
    mcols(gene_dar_D$apoe4v3_male) %>% as_tibble() %>% dplyr::select(gene_id, dar)
  ) %>%
  mutate(
    dar = ifelse(is.na(dar), 0, dar),
    alpha = lm_D_apoe4v3_male_norm(dar),
    alpha = ifelse(alpha > 1, 1, alpha),
    alpha = ifelse(alpha < 0.1, 0.1, alpha),
    darP = pbeta(PValue, alpha, 1),
    FDR_darP = p.adjust(darP, "fdr"),
    wasDE = FDR < 0.05,
    isDE = FDR_darP < 0.05
  ) %>%
  dplyr::filter(wasDE | isDE) %>%
  dplyr::arrange(!isDE, darP) %>%
  as.data.frame()
table(darP_D_apoe4v3_male_norm$isDE)
## 
## FALSE  TRUE 
##   182  2482
res_D$apoe4v3_male %>%
  as_tibble() %>%
  dplyr::select(
    range, starts_with("gene"), starts_with("log"), PValue, FDR
  ) %>%
  left_join(
    mcols(gene_dar_D$apoe4v3_male) %>% as_tibble() %>% dplyr::select(gene_id, dar)
  ) %>%
  mutate(
    dar = ifelse(is.na(dar), 0, dar),
    alpha = lm_D_apoe4v3_male_norm(dar),
    alpha = ifelse(alpha > 1, 1, alpha),
    alpha = ifelse(alpha < 0.1, 0.1, alpha),
    darP = pbeta(PValue, alpha, 1)
  ) %>%
  pivot_longer(
    cols = all_of(c("PValue", "darP")), names_to = "type", values_to = "p"
  ) %>%
  ggplot(aes(sample = -log(p), colour = type)) +
  stat_qq(distribution = qexp) +
  stat_qq_line(distribution = qexp) +
  facet_wrap(~type) +
  scale_colour_brewer(palette= "Set1")

Dataset E (zebrafish)

Load data

meta_E <- read_csv(here("data/metadata/pktu.csv")) %>%
  mutate(
    geno_psen1 = fct_relevel(geno_psen1, c("wt", "eofad", "fai")),
    geno_chr14 = fct_relevel(geno_chr14, c("tu", "pk", "het")),
    geno_joint = fct_relevel(geno_joint, unique(geno_joint))
  )
dar_E <- read_rds(here("data/dar/dar_fixed_E.Rds")) %>%
  sortSeqlevels() %>%
  endoapply(sort)
gene_dar_E <- read_rds("data/dar/gene_dar_fixed_E.Rds") %>%
  sortSeqlevels() %>%
  endoapply(sort)
dge_E <- read_rds("data/de/dgeList_pktu.Rds")
dge_E <- DGEList(
  counts = dge_E$counts,
  samples = dge_E$samples,
  genes = dge_E$genes
)
contrasts_E1 <- makeContrasts(
  eofad_wt = eofad - wt,
  fai_wt = fai - wt,
  eofad_fai = eofad - fai,
  levels = dge_E$samples$geno_psen1
)
contrasts_E2 <- makeContrasts(
  pk_tu = pk - tu,
  pk_het = pk - het,
  tu_het = tu - het,
  levels = dge_E$samples$geno_chr14
)
## Rerun the analysis using glmQL-Fits
res_E <- lapply(colnames(contrasts_E1), function(x){
  design <- model.matrix(~0 + geno_psen1, data = dge_E$samples) %>%
    set_colnames(str_remove(colnames(.), "geno_psen1"))
  dge_E %>%
    estimateDisp(design = design) %>%
    glmQLFit() %>%
    glmQLFTest(contrast = contrasts_E1[,x]) %>%
    topTags(n = Inf) %>%
    .[["table"]] %>%
    makeGRangesFromDataFrame(keep.extra.columns = TRUE) %>%
    sortSeqlevels() %>%
    sort()
}) %>%
  set_names(colnames(contrasts_E1))
res_E2 <- lapply(colnames(contrasts_E2), function(x){
  design <- model.matrix(~0 + geno_chr14, data = dge_E$samples) %>%
    set_colnames(str_remove(colnames(.), "geno_chr14"))
  dge_E %>%
    estimateDisp(design = design) %>%
    glmQLFit() %>%
    glmQLFTest(contrast = contrasts_E2[,x]) %>%
    topTags(n = Inf) %>%
    .[["table"]] %>%
    makeGRangesFromDataFrame(keep.extra.columns = TRUE) %>%
    sortSeqlevels() %>%
    sort()
}) %>%
  set_names(colnames(contrasts_E2))
res_E$pk_tu <- res_E2$pk_tu
res_E$pk_het <- res_E2$pk_het
res_E$tu_het <- res_E2$tu_het

Exploration

lapply(names(res_E), function(x){
  res_E[[x]] %>%
    mcols() %>%
    as_tibble() %>%
    dplyr::select(
      starts_with("gene"), starts_with("log"), PValue, FDR
    ) %>%
    left_join(
      mcols(gene_dar_E[[x]]) %>% as_tibble() %>% dplyr::select(gene_id, dar)
    ) %>%
    ggplot(aes(dar, -log10(PValue))) +
    geom_point() +
    labs(title = x)
})

lapply(names(res_E), function(x){
  res_E[[x]] %>%
    mcols() %>%
    as_tibble() %>%
    dplyr::select(
      starts_with("gene"), starts_with("log"), PValue, FDR
    ) %>%
    left_join(
      mcols(gene_dar_E[[x]]) %>% as_tibble() %>% dplyr::select(gene_id, dar)
    ) %>%
    mutate(
      dar_q = cut(
        dar,
        breaks = quantile(dar, probs = seq(0, 1, length.out = 5), na.rm = TRUE),
        # breaks = seq(0, 1, length.out = 7),
        include.lowest = TRUE
      ) %>%
        fct_na_value_to_level(levels(.)[[1]])
    ) %>%
    ggplot(aes(PValue, after_stat(density))) +
    geom_histogram(fill = "grey70", colour = "black", binwidth = 0.01) +
    geom_line(
      ## Add 1 - dar^2 in red
      aes(y = y, colour = f),
      data = . %>%
        pull(dar_q) %>%
        levels() %>%
        sapply(
          \(x) {
            min_dar = str_replace_all(x, "^[^0-9]([0-9\\.]+),.+", "\\1") %>% as.numeric()
            tibble(
              PValue = seq(0.01, 1, by = 0.01),
              `1 - DAR` = dbeta(PValue, 1 - min_dar, 1),
              `1 - DAR^2` = dbeta(PValue, 1 - min_dar^2, 1),
              `sqrt(1 - DAR)` = dbeta(PValue, sqrt(1 - min_dar), 1),
            )
          },
          simplify = FALSE
        ) %>%
        bind_rows(.id = "dar_q") %>%
        pivot_longer(
          cols = contains("- DAR"), names_to = "f", values_to = "y"
        ) %>%
        mutate(dar_q = fct_inorder(dar_q)),
    ) +
    geom_label(
      aes(x = 1, y = 0, label = label),
      data = . %>%
        summarise(n = dplyr::n(), .by = dar_q) %>%
        mutate(label = paste("n =", comma(n))),
      hjust = 1, vjust = 0, alpha = 0.7
    ) +
    facet_wrap(~dar_q, scales = "free_y") +
    labs(title = x)
})

beta_fits_E <- sapply(names(res_E), simplify = FALSE, function(x){
  res_E[[x]] %>%
    as_tibble() %>%
    dplyr::select(
      range, starts_with("gene"), starts_with("log"), PValue, FDR
    ) %>%
    left_join(
      mcols(gene_dar_E[[x]]) %>% as_tibble() %>% dplyr::select(gene_id, dar)
    ) %>%
    dplyr::filter(!is.na(dar)) %>%
    # mutate(dar_q = cut(dar, seq(0, 1, length.out = 5))) %>%
    mutate(
      dar_q = cut(
        dar,
        unique(quantile(dar, probs = seq(0, 1, length.out = 26))),
        include.lowest = TRUE
      )
    ) %>%
    split(.$dar_q) %>%
    lapply(
      \(x) {
        tryCatch({  # !!
          fit <- MASS::fitdistr(x$PValue, "beta", list(shape1 = 1, shape2 = 1))
          tibble(
            dar = median(x$dar),
            shape1 = fit$estimate["shape1"],
            se1 = fit$sd["shape1"],
            shape2 = fit$estimate["shape2"],
            se2 = fit$sd["shape2"]
          )
        }, error = \(e){
          tibble(
            dar = median(x$dar), shape1 = NA, se1 = NA, shape2 = NA, se2 = NA
          )
        })
      }
    ) %>%
    bind_rows() %>%
    mutate(shape1_norm = shape1 / max(shape1, na.rm = TRUE))
})
sapply(beta_fits_E, simplify = FALSE, function(x){
  as.data.frame(x)
})
## $eofad_wt
##           dar    shape1        se1    shape2        se2 shape1_norm
## 1  0.00000000 1.0047821 0.02462011 0.9957088 0.02435312   0.7373614
## 2  0.01625964 1.1518700 0.05706128 1.0153419 0.04909292   0.8453021
## 3  0.02777778 1.2490637 0.06270209 1.0721270 0.05233116   0.9166279
## 4  0.03612900 1.1984385 0.05988093 1.0181983 0.04932720   0.8794765
## 5  0.04225415 1.2455501 0.06228706 1.0628240 0.05161705   0.9140494
## 6  0.04861111 1.2523257 0.06281742 1.0242350 0.04948565   0.9190217
## 7  0.05555556 1.2267509 0.05277443 1.1087697 0.04682630   0.9002536
## 8  0.05698724 1.3626725 0.08387104 1.1790374 0.07078243   1.0000000
## 9  0.06349206 1.2085487 0.05972895 1.1004127 0.05345464   0.8868959
## 10 0.07042187 1.2510050 0.06160070 1.1101522 0.05349386   0.9180525
## 11 0.07733962 1.1852538 0.05961707 1.1548787 0.05781519   0.8698009
## 12 0.08333333 1.2687521 0.06306519 1.1919307 0.05859943   0.9310763
## 13 0.09259259 1.2203609 0.06078018 1.0763801 0.05237954   0.8955643
## 14 0.10040293 1.1285802 0.05591439 1.0357944 0.05047820   0.8282109
## 15 0.11111111 1.1793361 0.05567982 1.1167343 0.05219602   0.8654582
## 16 0.11728395 1.1332346 0.05900378 1.0782800 0.05561652   0.8316265
## 17 0.12829936 1.0456292 0.05134791 1.0330387 0.05060860   0.7673371
## 18 0.14177395 0.9541102 0.04642553 1.0078933 0.04959575   0.7001757
## 19 0.15912004 1.0564019 0.05071817 1.0229518 0.04880167   0.7752427
## 20 0.17876292 0.9518843 0.04727065 1.0184969 0.05128154   0.6985422
## 21 0.21382120 0.8762720 0.04229303 1.0343877 0.05168071   0.6430540
## 22 0.28132184 0.6687867 0.03133762 0.9083512 0.04571536   0.4907905
## 
## $fai_wt
##            dar    shape1        se1   shape2        se2 shape1_norm
## 1  0.000000000 1.0469079 0.02971339 1.018230 0.02874095   0.7580235
## 2  0.007936508 1.2741840 0.06358669 1.126344 0.05498011   0.9225850
## 3  0.018518519 1.2510786 0.06262316 1.057093 0.05129386   0.9058553
## 4  0.027777778 1.2116317 0.06044033 1.045319 0.05072271   0.8772934
## 5  0.035000200 1.2098452 0.05989886 1.096743 0.05332656   0.8759998
## 6  0.040380915 1.2964460 0.06495358 1.170199 0.05758249   0.9387040
## 7  0.044894406 1.2156032 0.06070682 1.036019 0.05021018   0.8801690
## 8  0.050000000 1.2723038 0.06352781 1.116020 0.05442578   0.9212236
## 9  0.055555556 1.1408010 0.04708284 1.057949 0.04304185   0.8260077
## 10 0.056172837 1.3811021 0.09259679 1.174494 0.07658527   1.0000000
## 11 0.061111111 1.3202811 0.06621206 1.112198 0.05410094   0.9559620
## 12 0.066666667 1.2582029 0.06264680 1.128821 0.05511841   0.9110137
## 13 0.072649573 1.2437282 0.06220366 1.059093 0.05142014   0.9005331
## 14 0.077777778 1.2937477 0.06479675 1.108874 0.05402300   0.9367503
## 15 0.083357570 1.2694053 0.06295593 1.207309 0.05935297   0.9191249
## 16 0.090908634 1.2139036 0.06023975 1.129279 0.05530777   0.8789384
## 17 0.098189487 1.2682216 0.06340682 1.092757 0.05317752   0.9182678
## 18 0.107077273 1.1505282 0.05492862 1.030953 0.04819736   0.8330508
## 19 0.112906491 1.1743535 0.06068854 1.111872 0.05688093   0.8503017
## 20 0.125062524 1.2530417 0.06234553 1.143131 0.05594826   0.9072767
## 21 0.139833711 1.0786157 0.05286772 1.112166 0.05483119   0.7809819
## 22 0.163250699 1.0311892 0.05039802 1.076194 0.05303857   0.7466423
## 23 0.208225108 0.8960612 0.04304755 1.071372 0.05338824   0.6488015
## 
## $eofad_fai
##           dar    shape1        se1    shape2        se2 shape1_norm
## 1  0.00000000 0.8084209 0.01927844 0.9992228 0.02494300   0.8271861
## 2  0.01851852 0.8726393 0.04187817 1.0282355 0.05106719   0.8928951
## 3  0.02777778 0.8845877 0.04237726 1.1153304 0.05600822   0.9051208
## 4  0.03688241 0.8476469 0.04063253 0.9754874 0.04818852   0.8673226
## 5  0.04285196 0.9773145 0.04718509 1.2290989 0.06199313   1.0000000
## 6  0.04879444 0.8382336 0.04003188 1.0481911 0.05247057   0.8576907
## 7  0.05555556 0.9330994 0.03628468 1.0782735 0.04317148   0.9547585
## 8  0.05603233 0.8437460 0.05947626 1.0787835 0.08003883   0.8633311
## 9  0.06126237 0.8763696 0.04184610 1.1417479 0.05751645   0.8967119
## 10 0.06746032 0.9373684 0.04511823 1.1285163 0.05635903   0.9591267
## 11 0.07407407 0.9034084 0.04333824 1.1353353 0.05702083   0.9243784
## 12 0.08055556 0.8356628 0.03865868 1.0519301 0.05107898   0.8550603
## 13 0.08731676 0.8559504 0.04229778 1.0751316 0.05570392   0.8758188
## 14 0.09523810 0.8279808 0.03928408 1.0618428 0.05308869   0.8472000
## 15 0.10442850 0.8959026 0.04299995 1.1597716 0.05861420   0.9166983
## 16 0.11111111 0.8507968 0.04052905 1.1205708 0.05648864   0.8705456
## 17 0.12158549 0.8113813 0.03870612 1.0052065 0.05021386   0.8302152
## 18 0.13587208 0.8670064 0.04114675 1.2108588 0.06142758   0.8871314
## 19 0.15277778 0.7574677 0.03581925 1.0300559 0.05208794   0.7750501
## 20 0.17375067 0.7899069 0.03733063 1.1031795 0.05594983   0.8082422
## 21 0.21392121 0.6854525 0.03189870 1.0713914 0.05506833   0.7013632
## 22 0.28488870 0.5294518 0.02404602 0.9361927 0.04893386   0.5417415
## 
## $pk_tu
##           dar    shape1        se1    shape2        se2 shape1_norm
## 1  0.00000000 1.0773128 0.03086767 0.9484855 0.02649032   0.6546973
## 2  0.01111111 1.4150964 0.07252230 0.9527728 0.04540179   0.8599728
## 3  0.02464971 1.4280533 0.07255585 1.0553602 0.05083222   0.8678469
## 4  0.03395062 1.4353703 0.07278655 0.9908302 0.04699352   0.8722935
## 5  0.04073720 1.4306315 0.07385398 0.9763828 0.04702504   0.8694137
## 6  0.04629630 1.5231175 0.07805774 1.0362229 0.04966332   0.9256186
## 7  0.05235744 1.6455130 0.08378062 1.1106194 0.05299824   1.0000000
## 8  0.05555556 1.3720253 0.06806620 1.0187923 0.04788702   0.8337979
## 9  0.05934014 1.4385896 0.07557485 1.0458324 0.05193154   0.8742499
## 10 0.06481481 1.5494599 0.07909549 1.0959460 0.05276063   0.9416273
## 11 0.07017508 1.4391622 0.07332345 1.0410079 0.05008302   0.8745979
## 12 0.07540643 1.4253446 0.07235598 1.0632021 0.05125567   0.8662008
## 13 0.08103175 1.4432688 0.07190194 1.1001298 0.05228996   0.8770936
## 14 0.08671745 1.5882392 0.08238948 1.1626220 0.05730440   0.9651940
## 15 0.09338779 1.4411719 0.07250464 1.1321597 0.05465050   0.8758192
## 16 0.10136311 1.3331719 0.06776312 1.0110395 0.04884608   0.8101862
## 17 0.11111111 1.3357492 0.06700176 1.1272979 0.05487961   0.8117524
## 18 0.11952862 1.2850686 0.06479350 1.0070803 0.04852625   0.7809532
## 19 0.13333333 1.2089924 0.06051498 1.0095087 0.04883168   0.7347207
## 20 0.15091308 1.2119697 0.06041727 1.0631501 0.05172224   0.7365300
## 21 0.16861894 1.1181113 0.05535831 1.0304128 0.05021790   0.6794910
## 22 0.20753296 0.9203194 0.04472274 0.9682561 0.04755503   0.5592903
## 23 0.30032918 0.5632364 0.02596805 0.8364516 0.04254534   0.3422862
## 
## $pk_het
##           dar    shape1        se1    shape2        se2 shape1_norm
## 1  0.00000000 0.8912718 0.02161854 0.9348192 0.02290699   0.6410762
## 2  0.01614621 1.1248307 0.05629911 0.9253306 0.04454259   0.8090710
## 3  0.02777778 1.2304786 0.06190328 0.9688183 0.04656147   0.8850617
## 4  0.03640559 1.3139010 0.06560835 1.1262072 0.05471954   0.9450659
## 5  0.04320988 1.2029304 0.06063344 0.9574647 0.04617708   0.8652468
## 6  0.04976852 1.3902744 0.06986950 1.1735817 0.05729273   1.0000000
## 7  0.05555556 1.0996914 0.05167694 0.9401902 0.04281786   0.7909887
## 8  0.05836165 1.2404415 0.06638303 1.0316688 0.05334417   0.8922278
## 9  0.06609206 1.1638645 0.05799061 1.0106957 0.04901372   0.8371473
## 10 0.07407407 1.1412827 0.05664684 1.0294432 0.05009448   0.8209046
## 11 0.08333333 1.1323193 0.05620705 0.9920483 0.04799182   0.8144574
## 12 0.09090909 1.0355725 0.05105646 0.9947404 0.04864996   0.7448691
## 13 0.10000000 1.0428104 0.05104118 1.0446927 0.05115145   0.7500752
## 14 0.11111111 1.0310880 0.05090059 0.9497071 0.04610555   0.7416435
## 15 0.11799343 1.0661521 0.05283077 0.9688915 0.04710118   0.7668645
## 16 0.13080050 1.0461851 0.05141327 1.0242166 0.05012294   0.7525026
## 17 0.14692665 0.9265272 0.04518503 0.8993777 0.04358334   0.6664347
## 18 0.16626068 0.9413272 0.04598480 0.9641922 0.04733770   0.6770801
## 19 0.18612056 0.9401167 0.04570278 0.9962492 0.04901428   0.6762094
## 20 0.21851852 0.7778995 0.03707980 0.9627871 0.04810116   0.5595294
## 21 0.26147247 0.7082393 0.03342675 0.9186427 0.04601960   0.5094241
## 22 0.33333333 0.6076665 0.02815911 0.8938747 0.04544650   0.4370839
## 
## $tu_het
##           dar    shape1        se1    shape2        se2 shape1_norm
## 1  0.00000000 1.1413357 0.02850221 0.9505076 0.02289607   0.6816328
## 2  0.01525054 1.5593133 0.08030164 1.0184672 0.04870769   0.9312589
## 3  0.02620299 1.5689669 0.08090025 1.0195926 0.04878890   0.9370243
## 4  0.03388598 1.4185959 0.06894206 0.9738205 0.04418918   0.8472192
## 5  0.04006574 1.6744143 0.09148042 1.0702008 0.05427509   1.0000000
## 6  0.04533572 1.5146482 0.07780080 0.9980350 0.04761966   0.9045839
## 7  0.05148698 1.4501633 0.07386391 0.9716071 0.04602504   0.8660720
## 8  0.05555556 1.3458248 0.06429094 0.9925739 0.04483005   0.8037585
## 9  0.05826801 1.3866646 0.07658300 0.9526025 0.04907719   0.8281490
## 10 0.06491879 1.4460038 0.07409663 0.9776776 0.04667364   0.8635878
## 11 0.07205237 1.4437958 0.07387654 0.9905651 0.04735976   0.8622691
## 12 0.07934291 1.3425027 0.06658609 0.9944807 0.04667221   0.8017745
## 13 0.08607712 1.4704006 0.07695645 1.0233080 0.05023085   0.8781582
## 14 0.09409722 1.3043927 0.06577441 1.0173529 0.04900182   0.7790143
## 15 0.10213350 1.3957085 0.07094233 1.0213835 0.04907687   0.8335503
## 16 0.11111111 1.3070458 0.06608986 0.9677725 0.04625162   0.7805988
## 17 0.11754121 1.3060627 0.06654687 0.9466928 0.04538753   0.7800117
## 18 0.12962963 1.1846363 0.05928188 0.9850551 0.04757482   0.7074930
## 19 0.14444444 1.1155891 0.05540726 0.9813051 0.04752546   0.6662563
## 20 0.16666667 1.1546807 0.05769642 0.9756571 0.04717827   0.6896027
## 21 0.19739514 0.9138406 0.04380057 0.9413325 0.04540206   0.5457673
## 22 0.26475376 0.8932055 0.04369817 1.0272942 0.05173889   0.5334436

EOfAD vs WT

plotly::ggplotly({
  plotDarECDF(dar = dar_E$eofad_wt, dar_val = "origin")
})

Before normalisation of alpha

# lm(shape1 ~ dar + I(dar^2), data = beta_fits_D$eofad_wt) %>%
lm(shape1 ~ dar, data = beta_fits_E$eofad_wt) %>%
  step() %>%
  summary()
## Start:  AIC=-99.63
## shape1 ~ dar
## 
##        Df Sum of Sq     RSS     AIC
## <none>              0.19798 -99.634
## - dar   1    0.3348 0.53278 -79.855
## 
## Call:
## lm(formula = shape1 ~ dar, data = beta_fits_E$eofad_wt)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.30073 -0.02983  0.01965  0.04595  0.16290 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.30552    0.03715  35.144  < 2e-16 ***
## dar         -1.85562    0.31907  -5.816 1.09e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.09949 on 20 degrees of freedom
## Multiple R-squared:  0.6284, Adjusted R-squared:  0.6098 
## F-statistic: 33.82 on 1 and 20 DF,  p-value: 1.088e-05
lm_E_eofad_wt <- function(dar){
  1.30552 - 1.85562 * dar
}
beta_fits_E$eofad_wt %>%
  mutate(
    `1 - DAR` = 1  - dar,
    `sqrt(1 - DAR)` = sqrt(1 - dar),
    `(1.1 - DAR)^2` = (1.1 - dar)^2,
    `DAR lm` = lm_E_eofad_wt(dar)
  ) %>%
  pivot_longer(cols = matches("DAR", FALSE), names_to = "f", values_to = "estimate") %>%
  mutate(estimate = ifelse(estimate > 1, 1, estimate)) %>%
  ggplot(aes(dar, shape1)) +
  geom_point(
    data = . %>% distinct(dar, shape1)
  ) +
  geom_line(aes(y = estimate, colour = f))

darP_E_eofad_wt <- res_E$eofad_wt %>%
  as_tibble() %>%
  dplyr::select(
    range, starts_with("gene"), starts_with("log"), PValue, FDR
  ) %>%
  left_join(
    mcols(gene_dar_E$eofad_wt) %>% as_tibble() %>% dplyr::select(gene_id, dar)
  ) %>%
  mutate(
    dar = ifelse(is.na(dar), 0, dar),
    alpha = lm_E_eofad_wt(dar),
    alpha = ifelse(alpha > 1, 1, alpha),
    alpha = ifelse(alpha < 0.1, 0.1, alpha),
    darP = pbeta(PValue, alpha, 1),
    FDR_darP = p.adjust(darP, "fdr"),
    wasDE = FDR < 0.05,
    isDE = FDR_darP < 0.05
  ) %>%
  dplyr::filter(wasDE | isDE) %>%
  dplyr::arrange(!isDE, darP) %>%
  as.data.frame()
table(darP_E_eofad_wt$isDE)
## 
## FALSE  TRUE 
##     1     4
res_E$eofad_wt %>%
  as_tibble() %>%
  dplyr::select(
    range, starts_with("gene"), starts_with("log"), PValue, FDR
  ) %>%
  left_join(
    mcols(gene_dar_E$eofad_wt) %>% as_tibble() %>% dplyr::select(gene_id, dar)
  ) %>%
  mutate(
    dar = ifelse(is.na(dar), 0, dar),
    alpha = lm_E_eofad_wt(dar),
    alpha = ifelse(alpha > 1, 1, alpha),
    alpha = ifelse(alpha < 0.1, 0.1, alpha),
    darP = pbeta(PValue, alpha, 1)
  ) %>%
  pivot_longer(
    cols = all_of(c("PValue", "darP")), names_to = "type", values_to = "p"
  ) %>%
  ggplot(aes(sample = -log(p), colour = type)) +
  stat_qq(distribution = qexp) +
  stat_qq_line(distribution = qexp) +
  facet_wrap(~type) +
  scale_colour_brewer(palette= "Set1")

After normalisation of alpha

# lm(shape1_norm ~ dar + I(dar^2), data = beta_fits_E$eofad_wt) %>%
lm(shape1_norm ~ dar, data = beta_fits_E$eofad_wt) %>%
  step() %>%
  summary()
## Start:  AIC=-113.25
## shape1_norm ~ dar
## 
##        Df Sum of Sq     RSS      AIC
## <none>              0.10662 -113.250
## - dar   1    0.1803 0.28692  -93.471
## 
## Call:
## lm(formula = shape1_norm ~ dar, data = beta_fits_E$eofad_wt)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.22069 -0.02189  0.01442  0.03372  0.11955 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.95805    0.02726  35.144  < 2e-16 ***
## dar         -1.36175    0.23415  -5.816 1.09e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.07301 on 20 degrees of freedom
## Multiple R-squared:  0.6284, Adjusted R-squared:  0.6098 
## F-statistic: 33.82 on 1 and 20 DF,  p-value: 1.088e-05
lm_E_eofad_wt_norm <- function(dar){
  1.10422 - 1.56951 * dar
}
dar_tmp <- dar_E$eofad_wt
lm_all <- lm_all %>%
  rbind(tibble(
    dataset = "E_eofad_wt",
    intercept = 1.10422,
    slope = -1.56951,
    dar_mean = mean(dar_tmp$dar_origin),
    dar_median = median(dar_tmp$dar_origin),
    dar_mean_0 = mean(dar_tmp$dar_origin[dar_tmp$dar_origin != 0]),
    dar_median_0 = median(dar_tmp$dar_origin[dar_tmp$dar_origin != 0])
  ))
beta_fits_E$eofad_wt %>%
  mutate(
    `1 - DAR` = 1  - dar,
    `sqrt(1 - DAR)` = sqrt(1 - dar),
    `(1.1 - DAR)^2` = (1.1 - dar)^2,
    `DAR lm` = lm_E_eofad_wt_norm(dar)
  ) %>%
  pivot_longer(cols = matches("DAR", FALSE), names_to = "f", values_to = "estimate") %>%
  mutate(estimate = ifelse(estimate > 1, 1, estimate)) %>%
  ggplot(aes(dar, shape1_norm)) +
  geom_point(
    data = . %>% distinct(dar, shape1_norm)
  ) +
  geom_line(aes(y = estimate, colour = f))

darP_E_eofad_wt_norm <- res_E$eofad_wt %>%
  as_tibble() %>%
  dplyr::select(
    range, starts_with("gene"), starts_with("log"), PValue, FDR
  ) %>%
  left_join(
    mcols(gene_dar_E$eofad_wt) %>% as_tibble() %>% dplyr::select(gene_id, dar)
  ) %>%
  mutate(
    dar = ifelse(is.na(dar), 0, dar),
    alpha = lm_E_eofad_wt_norm(dar),
    alpha = ifelse(alpha > 1, 1, alpha),
    alpha = ifelse(alpha < 0.1, 0.1, alpha),
    darP = pbeta(PValue, alpha, 1),
    FDR_darP = p.adjust(darP, "fdr"),
    wasDE = FDR < 0.05,
    isDE = FDR_darP < 0.05
  ) %>%
  dplyr::filter(wasDE | isDE) %>%
  dplyr::arrange(!isDE, darP) %>%
  as.data.frame()
table(darP_E_eofad_wt_norm$isDE)
## 
## FALSE  TRUE 
##     4     1
res_E$eofad_wt %>%
  as_tibble() %>%
  dplyr::select(
    range, starts_with("gene"), starts_with("log"), PValue, FDR
  ) %>%
  left_join(
    mcols(gene_dar_E$eofad_wt) %>% as_tibble() %>% dplyr::select(gene_id, dar)
  ) %>%
  mutate(
    dar = ifelse(is.na(dar), 0, dar),
    alpha = lm_E_eofad_wt_norm(dar),
    alpha = ifelse(alpha > 1, 1, alpha),
    alpha = ifelse(alpha < 0.1, 0.1, alpha),
    darP = pbeta(PValue, alpha, 1)
  ) %>%
  pivot_longer(
    cols = all_of(c("PValue", "darP")), names_to = "type", values_to = "p"
  ) %>%
  ggplot(aes(sample = -log(p), colour = type)) +
  stat_qq(distribution = qexp) +
  stat_qq_line(distribution = qexp) +
  facet_wrap(~type) +
  scale_colour_brewer(palette= "Set1")

fAI vs WT

plotly::ggplotly({
  plotDarECDF(dar = dar_E$fai_wt, dar_val = "origin")
})

Before normalisation of alpha

# lm(shape1 ~ dar + I(dar^2), data = beta_fits_D$fai_wt) %>%
lm(shape1 ~ dar, data = beta_fits_E$fai_wt) %>%
  step() %>%
  summary()
## Start:  AIC=-107.63
## shape1 ~ dar
## 
##        Df Sum of Sq     RSS     AIC
## <none>              0.17946 -107.63
## - dar   1  0.082155 0.26161 -100.96
## 
## Call:
## lm(formula = shape1 ~ dar, data = beta_fits_E$fai_wt)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.25111 -0.04832  0.01256  0.05955  0.15086 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.29802    0.03523  36.841  < 2e-16 ***
## dar         -1.20650    0.38912  -3.101  0.00542 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.09244 on 21 degrees of freedom
## Multiple R-squared:  0.314,  Adjusted R-squared:  0.2814 
## F-statistic: 9.614 on 1 and 21 DF,  p-value: 0.005415
lm_E_fai_wt <- function(dar){
  1.29802 - 1.20650 * dar
}
beta_fits_E$fai_wt %>%
  mutate(
    `1 - DAR` = 1  - dar,
    `sqrt(1 - DAR)` = sqrt(1 - dar),
    `(1.1 - DAR)^2` = (1.1 - dar)^2,
    `DAR lm` = lm_E_fai_wt(dar)
  ) %>%
  pivot_longer(cols = matches("DAR", FALSE), names_to = "f", values_to = "estimate") %>%
  mutate(estimate = ifelse(estimate > 1, 1, estimate)) %>%
  ggplot(aes(dar, shape1)) +
  geom_point(
    data = . %>% distinct(dar, shape1)
  ) +
  geom_line(aes(y = estimate, colour = f))

darP_E_fai_wt <- res_E$fai_wt %>%
  as_tibble() %>%
  dplyr::select(
    range, starts_with("gene"), starts_with("log"), PValue, FDR
  ) %>%
  left_join(
    mcols(gene_dar_E$fai_wt) %>% as_tibble() %>% dplyr::select(gene_id, dar)
  ) %>%
  mutate(
    dar = ifelse(is.na(dar), 0, dar),
    alpha = lm_E_fai_wt(dar),
    alpha = ifelse(alpha > 1, 1, alpha),
    alpha = ifelse(alpha < 0.1, 0.1, alpha),
    darP = pbeta(PValue, alpha, 1),
    FDR_darP = p.adjust(darP, "fdr"),
    wasDE = FDR < 0.05,
    isDE = FDR_darP < 0.05
  ) %>%
  dplyr::filter(wasDE | isDE) %>%
  dplyr::arrange(!isDE, darP) %>%
  as.data.frame()
table(darP_E_fai_wt$isDE)
## 
## TRUE 
##    2
res_E$fai_wt %>%
  as_tibble() %>%
  dplyr::select(
    range, starts_with("gene"), starts_with("log"), PValue, FDR
  ) %>%
  left_join(
    mcols(gene_dar_E$fai_wt) %>% as_tibble() %>% dplyr::select(gene_id, dar)
  ) %>%
  mutate(
    dar = ifelse(is.na(dar), 0, dar),
    alpha = lm_E_fai_wt(dar),
    alpha = ifelse(alpha > 1, 1, alpha),
    alpha = ifelse(alpha < 0.1, 0.1, alpha),
    darP = pbeta(PValue, alpha, 1)
  ) %>%
  pivot_longer(
    cols = all_of(c("PValue", "darP")), names_to = "type", values_to = "p"
  ) %>%
  ggplot(aes(sample = -log(p), colour = type)) +
  stat_qq(distribution = qexp) +
  stat_qq_line(distribution = qexp) +
  facet_wrap(~type) +
  scale_colour_brewer(palette= "Set1")

After normalisation of alpha

# lm(shape1_norm ~ dar + I(dar^2), data = beta_fits_E$fai_wt) %>%
lm(shape1_norm ~ dar, data = beta_fits_E$fai_wt) %>%
  step() %>%
  summary()
## Start:  AIC=-122.48
## shape1_norm ~ dar
## 
##        Df Sum of Sq      RSS     AIC
## <none>              0.094083 -122.48
## - dar   1  0.043071 0.137154 -115.81
## 
## Call:
## lm(formula = shape1_norm ~ dar, data = beta_fits_E$fai_wt)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.181819 -0.034986  0.009092  0.043120  0.109229 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.93984    0.02551  36.841  < 2e-16 ***
## dar         -0.87358    0.28174  -3.101  0.00542 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.06693 on 21 degrees of freedom
## Multiple R-squared:  0.314,  Adjusted R-squared:  0.2814 
## F-statistic: 9.614 on 1 and 21 DF,  p-value: 0.005415
lm_E_fai_wt_norm <- function(dar){
  1.04365 - 0.97007 * dar
}
dar_tmp <- dar_E$fai_wt
lm_all <- lm_all %>%
  rbind(tibble(
    dataset = "E_fai_wt",
    intercept = 1.04365,
    slope = -0.97007,
    dar_mean = mean(dar_tmp$dar_origin),
    dar_median = median(dar_tmp$dar_origin),
    dar_mean_0 = mean(dar_tmp$dar_origin[dar_tmp$dar_origin != 0]),
    dar_median_0 = median(dar_tmp$dar_origin[dar_tmp$dar_origin != 0])
  ))
beta_fits_E$fai_wt %>%
  mutate(
    `1 - DAR` = 1  - dar,
    `sqrt(1 - DAR)` = sqrt(1 - dar),
    `(1.1 - DAR)^2` = (1.1 - dar)^2,
    `DAR lm` = lm_E_fai_wt_norm(dar)
  ) %>%
  pivot_longer(cols = matches("DAR", FALSE), names_to = "f", values_to = "estimate") %>%
  mutate(estimate = ifelse(estimate > 1, 1, estimate)) %>%
  ggplot(aes(dar, shape1_norm)) +
  geom_point(
    data = . %>% distinct(dar, shape1_norm)
  ) +
  geom_line(aes(y = estimate, colour = f))

darP_E_fai_wt_norm <- res_E$fai_wt %>%
  as_tibble() %>%
  dplyr::select(
    range, starts_with("gene"), starts_with("log"), PValue, FDR
  ) %>%
  left_join(
    mcols(gene_dar_E$fai_wt) %>% as_tibble() %>% dplyr::select(gene_id, dar)
  ) %>%
  mutate(
    dar = ifelse(is.na(dar), 0, dar),
    alpha = lm_E_fai_wt_norm(dar),
    alpha = ifelse(alpha > 1, 1, alpha),
    alpha = ifelse(alpha < 0.1, 0.1, alpha),
    darP = pbeta(PValue, alpha, 1),
    FDR_darP = p.adjust(darP, "fdr"),
    wasDE = FDR < 0.05,
    isDE = FDR_darP < 0.05
  ) %>%
  dplyr::filter(wasDE | isDE) %>%
  dplyr::arrange(!isDE, darP) %>%
  as.data.frame()
table(darP_E_fai_wt_norm$isDE)
## 
## TRUE 
##    2
res_E$fai_wt %>%
  as_tibble() %>%
  dplyr::select(
    range, starts_with("gene"), starts_with("log"), PValue, FDR
  ) %>%
  left_join(
    mcols(gene_dar_E$fai_wt) %>% as_tibble() %>% dplyr::select(gene_id, dar)
  ) %>%
  mutate(
    dar = ifelse(is.na(dar), 0, dar),
    alpha = lm_E_fai_wt_norm(dar),
    alpha = ifelse(alpha > 1, 1, alpha),
    alpha = ifelse(alpha < 0.1, 0.1, alpha),
    darP = pbeta(PValue, alpha, 1)
  ) %>%
  pivot_longer(
    cols = all_of(c("PValue", "darP")), names_to = "type", values_to = "p"
  ) %>%
  ggplot(aes(sample = -log(p), colour = type)) +
  stat_qq(distribution = qexp) +
  stat_qq_line(distribution = qexp) +
  facet_wrap(~type) +
  scale_colour_brewer(palette= "Set1")

EOfAD vs fAI

plotly::ggplotly({
  plotDarECDF(dar = dar_E$eofad_fai, dar_val = "origin")
})

Before normalisation of alpha

# lm(shape1 ~ dar + I(dar^2), data = beta_fits_D$eofad_fai) %>%
lm(shape1 ~ dar, data = beta_fits_E$eofad_fai) %>%
  step() %>%
  summary()
## Start:  AIC=-123.25
## shape1 ~ dar
## 
##        Df Sum of Sq      RSS     AIC
## <none>              0.067664 -123.25
## - dar   1   0.11355 0.181212 -103.58
## 
## Call:
## lm(formula = shape1 ~ dar, data = beta_fits_E$eofad_fai)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.13083 -0.03195 -0.00184  0.04361  0.08474 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.93925    0.02147  43.748  < 2e-16 ***
## dar         -1.08928    0.18802  -5.793 1.14e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.05817 on 20 degrees of freedom
## Multiple R-squared:  0.6266, Adjusted R-squared:  0.6079 
## F-statistic: 33.56 on 1 and 20 DF,  p-value: 1.143e-05
lm_E_eofad_fai <- function(dar){
  0.93925 - 1.08928 * dar
}
beta_fits_E$eofad_fai %>%
  mutate(
    `1 - DAR` = 1  - dar,
    `sqrt(1 - DAR)` = sqrt(1 - dar),
    `(1.1 - DAR)^2` = (1.1 - dar)^2,
    `DAR lm` = lm_E_eofad_fai(dar)
  ) %>%
  pivot_longer(cols = matches("DAR", FALSE), names_to = "f", values_to = "estimate") %>%
  mutate(estimate = ifelse(estimate > 1, 1, estimate)) %>%
  ggplot(aes(dar, shape1)) +
  geom_point(
    data = . %>% distinct(dar, shape1)
  ) +
  geom_line(aes(y = estimate, colour = f))

darP_E_eofad_fai <- res_E$eofad_fai %>%
  as_tibble() %>%
  dplyr::select(
    range, starts_with("gene"), starts_with("log"), PValue, FDR
  ) %>%
  left_join(
    mcols(gene_dar_E$eofad_fai) %>% as_tibble() %>% dplyr::select(gene_id, dar)
  ) %>%
  mutate(
    dar = ifelse(is.na(dar), 0, dar),
    alpha = lm_E_eofad_fai(dar),
    alpha = ifelse(alpha > 1, 1, alpha),
    alpha = ifelse(alpha < 0.1, 0.1, alpha),
    darP = pbeta(PValue, alpha, 1),
    FDR_darP = p.adjust(darP, "fdr"),
    wasDE = FDR < 0.05,
    isDE = FDR_darP < 0.05
  ) %>%
  dplyr::filter(wasDE | isDE) %>%
  dplyr::arrange(!isDE, darP) %>%
  as.data.frame()
table(darP_E_eofad_fai$isDE)
## 
## FALSE  TRUE 
##     9     5
res_E$eofad_fai %>%
  as_tibble() %>%
  dplyr::select(
    range, starts_with("gene"), starts_with("log"), PValue, FDR
  ) %>%
  left_join(
    mcols(gene_dar_E$eofad_fai) %>% as_tibble() %>% dplyr::select(gene_id, dar)
  ) %>%
  mutate(
    dar = ifelse(is.na(dar), 0, dar),
    alpha = lm_E_eofad_fai(dar),
    alpha = ifelse(alpha > 1, 1, alpha),
    alpha = ifelse(alpha < 0.1, 0.1, alpha),
    darP = pbeta(PValue, alpha, 1)
  ) %>%
  pivot_longer(
    cols = all_of(c("PValue", "darP")), names_to = "type", values_to = "p"
  ) %>%
  ggplot(aes(sample = -log(p), colour = type)) +
  stat_qq(distribution = qexp) +
  stat_qq_line(distribution = qexp) +
  facet_wrap(~type) +
  scale_colour_brewer(palette= "Set1")

After normalisation of alpha

# lm(shape1_norm ~ dar + I(dar^2), data = beta_fits_E$eofad_fai) %>%
lm(shape1_norm ~ dar, data = beta_fits_E$eofad_fai) %>%
  step() %>%
  summary()
## Start:  AIC=-122.24
## shape1_norm ~ dar
## 
##        Df Sum of Sq      RSS     AIC
## <none>              0.070842 -122.24
## - dar   1   0.11888 0.189722 -102.57
## 
## Call:
## lm(formula = shape1_norm ~ dar, data = beta_fits_E$eofad_fai)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.133868 -0.032696 -0.001883  0.044625  0.086708 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.96105    0.02197  43.748  < 2e-16 ***
## dar         -1.11457    0.19239  -5.793 1.14e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.05952 on 20 degrees of freedom
## Multiple R-squared:  0.6266, Adjusted R-squared:  0.6079 
## F-statistic: 33.56 on 1 and 20 DF,  p-value: 1.143e-05
lm_E_eofad_fai_norm <- function(dar){
  1.10601 - 1.28268 * dar
}
dar_tmp <- dar_E$eofad_fai
lm_all <- lm_all %>%
  rbind(tibble(
    dataset = "E_eofad_fai",
    intercept = 1.10601,
    slope = -1.28268,
    dar_mean = mean(dar_tmp$dar_origin),
    dar_median = median(dar_tmp$dar_origin),
    dar_mean_0 = mean(dar_tmp$dar_origin[dar_tmp$dar_origin != 0]),
    dar_median_0 = median(dar_tmp$dar_origin[dar_tmp$dar_origin != 0])
  ))
beta_fits_E$eofad_fai %>%
  mutate(
    `1 - DAR` = 1  - dar,
    `sqrt(1 - DAR)` = sqrt(1 - dar),
    `(1.1 - DAR)^2` = (1.1 - dar)^2,
    `DAR lm` = lm_E_eofad_fai_norm(dar)
  ) %>%
  pivot_longer(cols = matches("DAR", FALSE), names_to = "f", values_to = "estimate") %>%
  mutate(estimate = ifelse(estimate > 1, 1, estimate)) %>%
  ggplot(aes(dar, shape1_norm)) +
  geom_point(
    data = . %>% distinct(dar, shape1_norm)
  ) +
  geom_line(aes(y = estimate, colour = f))

darP_E_eofad_fai_norm <- res_E$eofad_fai %>%
  as_tibble() %>%
  dplyr::select(
    range, starts_with("gene"), starts_with("log"), PValue, FDR
  ) %>%
  left_join(
    mcols(gene_dar_E$eofad_fai) %>% as_tibble() %>% dplyr::select(gene_id, dar)
  ) %>%
  mutate(
    dar = ifelse(is.na(dar), 0, dar),
    alpha = lm_E_eofad_fai_norm(dar),
    alpha = ifelse(alpha > 1, 1, alpha),
    alpha = ifelse(alpha < 0.1, 0.1, alpha),
    darP = pbeta(PValue, alpha, 1),
    FDR_darP = p.adjust(darP, "fdr"),
    wasDE = FDR < 0.05,
    isDE = FDR_darP < 0.05
  ) %>%
  dplyr::filter(wasDE | isDE) %>%
  dplyr::arrange(!isDE, darP) %>%
  as.data.frame()
table(darP_E_eofad_fai_norm$isDE)
## 
## FALSE  TRUE 
##     8     6
res_E$eofad_fai %>%
  as_tibble() %>%
  dplyr::select(
    range, starts_with("gene"), starts_with("log"), PValue, FDR
  ) %>%
  left_join(
    mcols(gene_dar_E$eofad_fai) %>% as_tibble() %>% dplyr::select(gene_id, dar)
  ) %>%
  mutate(
    dar = ifelse(is.na(dar), 0, dar),
    alpha = lm_E_eofad_fai_norm(dar),
    alpha = ifelse(alpha > 1, 1, alpha),
    alpha = ifelse(alpha < 0.1, 0.1, alpha),
    darP = pbeta(PValue, alpha, 1)
  ) %>%
  pivot_longer(
    cols = all_of(c("PValue", "darP")), names_to = "type", values_to = "p"
  ) %>%
  ggplot(aes(sample = -log(p), colour = type)) +
  stat_qq(distribution = qexp) +
  stat_qq_line(distribution = qexp) +
  facet_wrap(~type) +
  scale_colour_brewer(palette= "Set1")

PK vs Tu

plotly::ggplotly({
  plotDarECDF(dar = dar_E$pk_tu, dar_val = "origin")
})

Before normalisation of alpha

# lm(shape1 ~ dar + I(dar^2), data = beta_fits_D$pk_tu) %>%
lm(shape1 ~ dar, data = beta_fits_E$pk_tu) %>%
  step() %>%
  summary()
## Start:  AIC=-84.24
## shape1 ~ dar
## 
##        Df Sum of Sq     RSS     AIC
## <none>              0.49598 -84.244
## - dar   1   0.74863 1.24461 -65.083
## 
## Call:
## lm(formula = shape1 ~ dar, data = beta_fits_E$pk_tu)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.50010 -0.05260  0.03008  0.06502  0.24551 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.57741    0.05415   29.13  < 2e-16 ***
## dar         -2.70633    0.48070   -5.63 1.38e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1537 on 21 degrees of freedom
## Multiple R-squared:  0.6015, Adjusted R-squared:  0.5825 
## F-statistic:  31.7 on 1 and 21 DF,  p-value: 1.377e-05
lm_E_pk_tu <- function(dar){
  1.57741 - 2.70633 * dar
}
beta_fits_E$pk_tu %>%
  mutate(
    `1 - DAR` = 1  - dar,
    `sqrt(1 - DAR)` = sqrt(1 - dar),
    `(1.1 - DAR)^2` = (1.1 - dar)^2,
    `DAR lm` = lm_E_pk_tu(dar)
  ) %>%
  pivot_longer(cols = matches("DAR", FALSE), names_to = "f", values_to = "estimate") %>%
  mutate(estimate = ifelse(estimate > 1, 1, estimate)) %>%
  ggplot(aes(dar, shape1)) +
  geom_point(
    data = . %>% distinct(dar, shape1)
  ) +
  geom_line(aes(y = estimate, colour = f))

darP_E_pk_tu <- res_E$pk_tu %>%
  as_tibble() %>%
  dplyr::select(
    range, starts_with("gene"), starts_with("log"), PValue, FDR
  ) %>%
  left_join(
    mcols(gene_dar_E$pk_tu) %>% as_tibble() %>% dplyr::select(gene_id, dar)
  ) %>%
  mutate(
    dar = ifelse(is.na(dar), 0, dar),
    alpha = lm_E_pk_tu(dar),
    alpha = ifelse(alpha > 1, 1, alpha),
    alpha = ifelse(alpha < 0.1, 0.1, alpha),
    darP = pbeta(PValue, alpha, 1),
    FDR_darP = p.adjust(darP, "fdr"),
    wasDE = FDR < 0.05,
    isDE = FDR_darP < 0.05
  ) %>%
  dplyr::filter(wasDE | isDE) %>%
  dplyr::arrange(!isDE, darP) %>%
  as.data.frame()
table(darP_E_pk_tu$isDE)
## 
## FALSE  TRUE 
##     5     2
res_E$pk_tu %>%
  as_tibble() %>%
  dplyr::select(
    range, starts_with("gene"), starts_with("log"), PValue, FDR
  ) %>%
  left_join(
    mcols(gene_dar_E$pk_tu) %>% as_tibble() %>% dplyr::select(gene_id, dar)
  ) %>%
  mutate(
    dar = ifelse(is.na(dar), 0, dar),
    alpha = lm_E_pk_tu(dar),
    alpha = ifelse(alpha > 1, 1, alpha),
    alpha = ifelse(alpha < 0.1, 0.1, alpha),
    darP = pbeta(PValue, alpha, 1)
  ) %>%
  pivot_longer(
    cols = all_of(c("PValue", "darP")), names_to = "type", values_to = "p"
  ) %>%
  ggplot(aes(sample = -log(p), colour = type)) +
  stat_qq(distribution = qexp) +
  stat_qq_line(distribution = qexp) +
  facet_wrap(~type) +
  scale_colour_brewer(palette= "Set1")

After normalisation of alpha

# lm(shape1_norm ~ dar + I(dar^2), data = beta_fits_E$pk_tu) %>%
lm(shape1_norm ~ dar, data = beta_fits_E$pk_tu) %>%
  step() %>%
  summary()
## Start:  AIC=-107.15
## shape1_norm ~ dar
## 
##        Df Sum of Sq     RSS      AIC
## <none>              0.18317 -107.155
## - dar   1   0.27648 0.45966  -87.994
## 
## Call:
## lm(formula = shape1_norm ~ dar, data = beta_fits_E$pk_tu)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.30392 -0.03197  0.01828  0.03951  0.14920 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   0.9586     0.0329   29.13  < 2e-16 ***
## dar          -1.6447     0.2921   -5.63 1.38e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.09339 on 21 degrees of freedom
## Multiple R-squared:  0.6015, Adjusted R-squared:  0.5825 
## F-statistic:  31.7 on 1 and 21 DF,  p-value: 1.377e-05
lm_E_pk_tu_norm <- function(dar){
  1.10669 - 1.89872 * dar
}
dar_tmp <- dar_E$pk_tu
lm_all <- lm_all %>%
  rbind(tibble(
    dataset = "E_pk_tu",
    intercept = 1.10669,
    slope = -1.89872,
    dar_mean = mean(dar_tmp$dar_origin),
    dar_median = median(dar_tmp$dar_origin),
    dar_mean_0 = mean(dar_tmp$dar_origin[dar_tmp$dar_origin != 0]),
    dar_median_0 = median(dar_tmp$dar_origin[dar_tmp$dar_origin != 0])
  ))
beta_fits_E$pk_tu %>%
  mutate(
    `1 - DAR` = 1  - dar,
    `sqrt(1 - DAR)` = sqrt(1 - dar),
    `(1.1 - DAR)^2` = (1.1 - dar)^2,
    `DAR lm` = lm_E_pk_tu_norm(dar)
  ) %>%
  pivot_longer(cols = matches("DAR", FALSE), names_to = "f", values_to = "estimate") %>%
  mutate(estimate = ifelse(estimate > 1, 1, estimate)) %>%
  ggplot(aes(dar, shape1_norm)) +
  geom_point(
    data = . %>% distinct(dar, shape1_norm)
  ) +
  geom_line(aes(y = estimate, colour = f))

darP_E_pk_tu_norm <- res_E$pk_tu %>%
  as_tibble() %>%
  dplyr::select(
    range, starts_with("gene"), starts_with("log"), PValue, FDR
  ) %>%
  left_join(
    mcols(gene_dar_E$pk_tu) %>% as_tibble() %>% dplyr::select(gene_id, dar)
  ) %>%
  mutate(
    dar = ifelse(is.na(dar), 0, dar),
    alpha = lm_E_pk_tu_norm(dar),
    alpha = ifelse(alpha > 1, 1, alpha),
    alpha = ifelse(alpha < 0.1, 0.1, alpha),
    darP = pbeta(PValue, alpha, 1),
    FDR_darP = p.adjust(darP, "fdr"),
    wasDE = FDR < 0.05,
    isDE = FDR_darP < 0.05
  ) %>%
  dplyr::filter(wasDE | isDE) %>%
  dplyr::arrange(!isDE, darP) %>%
  as.data.frame()
table(darP_E_pk_tu_norm$isDE)
## 
## FALSE  TRUE 
##     6     1
res_E$pk_tu %>%
  as_tibble() %>%
  dplyr::select(
    range, starts_with("gene"), starts_with("log"), PValue, FDR
  ) %>%
  left_join(
    mcols(gene_dar_E$pk_tu) %>% as_tibble() %>% dplyr::select(gene_id, dar)
  ) %>%
  mutate(
    dar = ifelse(is.na(dar), 0, dar),
    alpha = lm_E_pk_tu_norm(dar),
    alpha = ifelse(alpha > 1, 1, alpha),
    alpha = ifelse(alpha < 0.1, 0.1, alpha),
    darP = pbeta(PValue, alpha, 1)
  ) %>%
  pivot_longer(
    cols = all_of(c("PValue", "darP")), names_to = "type", values_to = "p"
  ) %>%
  ggplot(aes(sample = -log(p), colour = type)) +
  stat_qq(distribution = qexp) +
  stat_qq_line(distribution = qexp) +
  facet_wrap(~type) +
  scale_colour_brewer(palette= "Set1")

PK vs het

plotly::ggplotly({
  plotDarECDF(dar = dar_E$pk_het, dar_val = "origin")
})

Before normalisation of alpha

# lm(shape1 ~ dar + I(dar^2), data = beta_fits_D$pk_het) %>%
lm(shape1 ~ dar, data = beta_fits_E$pk_het) %>%
  step() %>%
  summary()
## Start:  AIC=-95.03
## shape1 ~ dar
## 
##        Df Sum of Sq     RSS     AIC
## <none>              0.24408 -95.029
## - dar   1   0.52135 0.76542 -71.884
## 
## Call:
## lm(formula = shape1 ~ dar, data = beta_fits_E$pk_het)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.36024 -0.04638  0.01646  0.03780  0.23264 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.25151    0.03905  32.051  < 2e-16 ***
## dar         -1.88631    0.28860  -6.536 2.27e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1105 on 20 degrees of freedom
## Multiple R-squared:  0.6811, Adjusted R-squared:  0.6652 
## F-statistic: 42.72 on 1 and 20 DF,  p-value: 2.274e-06
lm_E_pk_het <- function(dar){
  1.25151 - 1.88631 * dar
}
beta_fits_E$pk_het %>%
  mutate(
    `1 - DAR` = 1  - dar,
    `sqrt(1 - DAR)` = sqrt(1 - dar),
    `(1.1 - DAR)^2` = (1.1 - dar)^2,
    `DAR lm` = lm_E_pk_het(dar)
  ) %>%
  pivot_longer(cols = matches("DAR", FALSE), names_to = "f", values_to = "estimate") %>%
  mutate(estimate = ifelse(estimate > 1, 1, estimate)) %>%
  ggplot(aes(dar, shape1)) +
  geom_point(
    data = . %>% distinct(dar, shape1)
  ) +
  geom_line(aes(y = estimate, colour = f))

darP_E_pk_het <- res_E$pk_het %>%
  as_tibble() %>%
  dplyr::select(
    range, starts_with("gene"), starts_with("log"), PValue, FDR
  ) %>%
  left_join(
    mcols(gene_dar_E$pk_het) %>% as_tibble() %>% dplyr::select(gene_id, dar)
  ) %>%
  mutate(
    dar = ifelse(is.na(dar), 0, dar),
    alpha = lm_E_pk_het(dar),
    alpha = ifelse(alpha > 1, 1, alpha),
    alpha = ifelse(alpha < 0.1, 0.1, alpha),
    darP = pbeta(PValue, alpha, 1),
    FDR_darP = p.adjust(darP, "fdr"),
    wasDE = FDR < 0.05,
    isDE = FDR_darP < 0.05
  ) %>%
  dplyr::filter(wasDE | isDE) %>%
  dplyr::arrange(!isDE, darP) %>%
  as.data.frame()
table(darP_E_pk_het$isDE)
## 
## FALSE  TRUE 
##     2     2
res_E$pk_het %>%
  as_tibble() %>%
  dplyr::select(
    range, starts_with("gene"), starts_with("log"), PValue, FDR
  ) %>%
  left_join(
    mcols(gene_dar_E$pk_het) %>% as_tibble() %>% dplyr::select(gene_id, dar)
  ) %>%
  mutate(
    dar = ifelse(is.na(dar), 0, dar),
    alpha = lm_E_pk_het(dar),
    alpha = ifelse(alpha > 1, 1, alpha),
    alpha = ifelse(alpha < 0.1, 0.1, alpha),
    darP = pbeta(PValue, alpha, 1)
  ) %>%
  pivot_longer(
    cols = all_of(c("PValue", "darP")), names_to = "type", values_to = "p"
  ) %>%
  ggplot(aes(sample = -log(p), colour = type)) +
  stat_qq(distribution = qexp) +
  stat_qq_line(distribution = qexp) +
  facet_wrap(~type) +
  scale_colour_brewer(palette= "Set1")

After normalisation of alpha

# lm(shape1_norm ~ dar + I(dar^2), data = beta_fits_E$pk_het) %>%
lm(shape1_norm ~ dar, data = beta_fits_E$pk_het) %>%
  step() %>%
  summary()
## Start:  AIC=-109.53
## shape1_norm ~ dar
## 
##        Df Sum of Sq     RSS      AIC
## <none>              0.12628 -109.527
## - dar   1   0.26973 0.39601  -86.382
## 
## Call:
## lm(formula = shape1_norm ~ dar, data = beta_fits_E$pk_het)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.25911 -0.03336  0.01184  0.02719  0.16733 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.90019    0.02809  32.051  < 2e-16 ***
## dar         -1.35679    0.20758  -6.536 2.27e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.07946 on 20 degrees of freedom
## Multiple R-squared:  0.6811, Adjusted R-squared:  0.6652 
## F-statistic: 42.72 on 1 and 20 DF,  p-value: 2.274e-06
lm_E_pk_het_norm <- function(dar){
  1.18496 - 1.78599 * dar
}
dar_tmp <- dar_E$pk_het
lm_all <- lm_all %>%
  rbind(tibble(
    dataset = "E_pk_het",
    intercept = 1.18496,
    slope = -1.78599,
    dar_mean = mean(dar_tmp$dar_origin),
    dar_median = median(dar_tmp$dar_origin),
    dar_mean_0 = mean(dar_tmp$dar_origin[dar_tmp$dar_origin != 0]),
    dar_median_0 = median(dar_tmp$dar_origin[dar_tmp$dar_origin != 0])
  ))
beta_fits_E$pk_het %>%
  mutate(
    `1 - DAR` = 1  - dar,
    `sqrt(1 - DAR)` = sqrt(1 - dar),
    `(1.1 - DAR)^2` = (1.1 - dar)^2,
    `DAR lm` = lm_E_pk_het_norm(dar)
  ) %>%
  pivot_longer(cols = matches("DAR", FALSE), names_to = "f", values_to = "estimate") %>%
  mutate(estimate = ifelse(estimate > 1, 1, estimate)) %>%
  ggplot(aes(dar, shape1_norm)) +
  geom_point(
    data = . %>% distinct(dar, shape1_norm)
  ) +
  geom_line(aes(y = estimate, colour = f))

darP_E_pk_het_norm <- res_E$pk_het %>%
  as_tibble() %>%
  dplyr::select(
    range, starts_with("gene"), starts_with("log"), PValue, FDR
  ) %>%
  left_join(
    mcols(gene_dar_E$pk_het) %>% as_tibble() %>% dplyr::select(gene_id, dar)
  ) %>%
  mutate(
    dar = ifelse(is.na(dar), 0, dar),
    alpha = lm_E_pk_het_norm(dar),
    alpha = ifelse(alpha > 1, 1, alpha),
    alpha = ifelse(alpha < 0.1, 0.1, alpha),
    darP = pbeta(PValue, alpha, 1),
    FDR_darP = p.adjust(darP, "fdr"),
    wasDE = FDR < 0.05,
    isDE = FDR_darP < 0.05
  ) %>%
  dplyr::filter(wasDE | isDE) %>%
  dplyr::arrange(!isDE, darP) %>%
  as.data.frame()
table(darP_E_pk_het_norm$isDE)
## 
## FALSE  TRUE 
##     2     2
res_E$pk_het %>%
  as_tibble() %>%
  dplyr::select(
    range, starts_with("gene"), starts_with("log"), PValue, FDR
  ) %>%
  left_join(
    mcols(gene_dar_E$pk_het) %>% as_tibble() %>% dplyr::select(gene_id, dar)
  ) %>%
  mutate(
    dar = ifelse(is.na(dar), 0, dar),
    alpha = lm_E_pk_het_norm(dar),
    alpha = ifelse(alpha > 1, 1, alpha),
    alpha = ifelse(alpha < 0.1, 0.1, alpha),
    darP = pbeta(PValue, alpha, 1)
  ) %>%
  pivot_longer(
    cols = all_of(c("PValue", "darP")), names_to = "type", values_to = "p"
  ) %>%
  ggplot(aes(sample = -log(p), colour = type)) +
  stat_qq(distribution = qexp) +
  stat_qq_line(distribution = qexp) +
  facet_wrap(~type) +
  scale_colour_brewer(palette= "Set1")

Tu vs het

plotly::ggplotly({
  plotDarECDF(dar = dar_E$tu_het, dar_val = "origin")
})

Before normalisation of alpha

# lm(shape1 ~ dar + I(dar^2), data = beta_fits_D$tu_het) %>%
lm(shape1 ~ dar, data = beta_fits_E$tu_het) %>%
  step() %>%
  summary()
## Start:  AIC=-89.84
## shape1 ~ dar
## 
##        Df Sum of Sq     RSS     AIC
## <none>              0.30901 -89.839
## - dar   1   0.52863 0.83764 -69.901
## 
## Call:
## lm(formula = shape1 ~ dar, data = beta_fits_E$tu_het)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.41529 -0.04101  0.01946  0.06402  0.21831 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.55663    0.04644  33.517  < 2e-16 ***
## dar         -2.50894    0.42893  -5.849 1.01e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1243 on 20 degrees of freedom
## Multiple R-squared:  0.6311, Adjusted R-squared:  0.6126 
## F-statistic: 34.21 on 1 and 20 DF,  p-value: 1.01e-05
lm_E_tu_het <- function(dar){
  1.55663 - 2.50894 * dar
}
beta_fits_E$tu_het %>%
  mutate(
    `1 - DAR` = 1  - dar,
    `sqrt(1 - DAR)` = sqrt(1 - dar),
    `(1.1 - DAR)^2` = (1.1 - dar)^2,
    `DAR lm` = lm_E_tu_het(dar)
  ) %>%
  pivot_longer(cols = matches("DAR", FALSE), names_to = "f", values_to = "estimate") %>%
  mutate(estimate = ifelse(estimate > 1, 1, estimate)) %>%
  ggplot(aes(dar, shape1)) +
  geom_point(
    data = . %>% distinct(dar, shape1)
  ) +
  geom_line(aes(y = estimate, colour = f))

darP_E_tu_het <- res_E$tu_het %>%
  as_tibble() %>%
  dplyr::select(
    range, starts_with("gene"), starts_with("log"), PValue, FDR
  ) %>%
  left_join(
    mcols(gene_dar_E$tu_het) %>% as_tibble() %>% dplyr::select(gene_id, dar)
  ) %>%
  mutate(
    dar = ifelse(is.na(dar), 0, dar),
    alpha = lm_E_tu_het(dar),
    alpha = ifelse(alpha > 1, 1, alpha),
    alpha = ifelse(alpha < 0.1, 0.1, alpha),
    darP = pbeta(PValue, alpha, 1),
    FDR_darP = p.adjust(darP, "fdr"),
    wasDE = FDR < 0.05,
    isDE = FDR_darP < 0.05
  ) %>%
  dplyr::filter(wasDE | isDE) %>%
  dplyr::arrange(!isDE, darP) %>%
  as.data.frame()
table(darP_E_tu_het$isDE)
## < table of extent 0 >
res_E$tu_het %>%
  as_tibble() %>%
  dplyr::select(
    range, starts_with("gene"), starts_with("log"), PValue, FDR
  ) %>%
  left_join(
    mcols(gene_dar_E$tu_het) %>% as_tibble() %>% dplyr::select(gene_id, dar)
  ) %>%
  mutate(
    dar = ifelse(is.na(dar), 0, dar),
    alpha = lm_E_tu_het(dar),
    alpha = ifelse(alpha > 1, 1, alpha),
    alpha = ifelse(alpha < 0.1, 0.1, alpha),
    darP = pbeta(PValue, alpha, 1)
  ) %>%
  pivot_longer(
    cols = all_of(c("PValue", "darP")), names_to = "type", values_to = "p"
  ) %>%
  ggplot(aes(sample = -log(p), colour = type)) +
  stat_qq(distribution = qexp) +
  stat_qq_line(distribution = qexp) +
  facet_wrap(~type) +
  scale_colour_brewer(palette= "Set1")

After normalisation of alpha

# lm(shape1_norm ~ dar + I(dar^2), data = beta_fits_E$tu_het) %>%
lm(shape1_norm ~ dar, data = beta_fits_E$tu_het) %>%
  step() %>%
  summary()
## Start:  AIC=-112.52
## shape1_norm ~ dar
## 
##        Df Sum of Sq     RSS      AIC
## <none>              0.11022 -112.520
## - dar   1   0.18855 0.29877  -92.581
## 
## Call:
## lm(formula = shape1_norm ~ dar, data = beta_fits_E$tu_het)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.24802 -0.02449  0.01162  0.03823  0.13038 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.92966    0.02774  33.517  < 2e-16 ***
## dar         -1.49840    0.25617  -5.849 1.01e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.07424 on 20 degrees of freedom
## Multiple R-squared:  0.6311, Adjusted R-squared:  0.6126 
## F-statistic: 34.21 on 1 and 20 DF,  p-value: 1.01e-05
lm_E_tu_het_norm <- function(dar){
  1.13935 - 1.83637 * dar
}
dar_tmp <- dar_E$tu_het
lm_all <- lm_all %>%
  rbind(tibble(
    dataset = "E_tu_het",
    intercept = 1.13935,
    slope = -1.83637,
    dar_mean = mean(dar_tmp$dar_origin),
    dar_median = median(dar_tmp$dar_origin),
    dar_mean_0 = mean(dar_tmp$dar_origin[dar_tmp$dar_origin != 0]),
    dar_median_0 = median(dar_tmp$dar_origin[dar_tmp$dar_origin != 0])
  ))
beta_fits_E$tu_het %>%
  mutate(
    `1 - DAR` = 1  - dar,
    `sqrt(1 - DAR)` = sqrt(1 - dar),
    `(1.1 - DAR)^2` = (1.1 - dar)^2,
    `DAR lm` = lm_E_tu_het_norm(dar)
  ) %>%
  pivot_longer(cols = matches("DAR", FALSE), names_to = "f", values_to = "estimate") %>%
  mutate(estimate = ifelse(estimate > 1, 1, estimate)) %>%
  ggplot(aes(dar, shape1_norm)) +
  geom_point(
    data = . %>% distinct(dar, shape1_norm)
  ) +
  geom_line(aes(y = estimate, colour = f))

darP_E_tu_het_norm <- res_E$tu_het %>%
  as_tibble() %>%
  dplyr::select(
    range, starts_with("gene"), starts_with("log"), PValue, FDR
  ) %>%
  left_join(
    mcols(gene_dar_E$tu_het) %>% as_tibble() %>% dplyr::select(gene_id, dar)
  ) %>%
  mutate(
    dar = ifelse(is.na(dar), 0, dar),
    alpha = lm_E_tu_het_norm(dar),
    alpha = ifelse(alpha > 1, 1, alpha),
    alpha = ifelse(alpha < 0.1, 0.1, alpha),
    darP = pbeta(PValue, alpha, 1),
    FDR_darP = p.adjust(darP, "fdr"),
    wasDE = FDR < 0.05,
    isDE = FDR_darP < 0.05
  ) %>%
  dplyr::filter(wasDE | isDE) %>%
  dplyr::arrange(!isDE, darP) %>%
  as.data.frame()
table(darP_E_tu_het_norm$isDE)
## < table of extent 0 >
res_E$tu_het %>%
  as_tibble() %>%
  dplyr::select(
    range, starts_with("gene"), starts_with("log"), PValue, FDR
  ) %>%
  left_join(
    mcols(gene_dar_E$tu_het) %>% as_tibble() %>% dplyr::select(gene_id, dar)
  ) %>%
  mutate(
    dar = ifelse(is.na(dar), 0, dar),
    alpha = lm_E_tu_het_norm(dar),
    alpha = ifelse(alpha > 1, 1, alpha),
    alpha = ifelse(alpha < 0.1, 0.1, alpha),
    darP = pbeta(PValue, alpha, 1)
  ) %>%
  pivot_longer(
    cols = all_of(c("PValue", "darP")), names_to = "type", values_to = "p"
  ) %>%
  ggplot(aes(sample = -log(p), colour = type)) +
  stat_qq(distribution = qexp) +
  stat_qq_line(distribution = qexp) +
  facet_wrap(~type) +
  scale_colour_brewer(palette= "Set1")

Summary

lm_all %>%
  lb_reactable(
    pagination = FALSE,
    columns = list(
                dar_mean = colDef(cell = react_format("f")),
                dar_median = colDef(cell = react_format("f")),
                dar_mean_0 = colDef(cell = react_format("f")),
                dar_median_0 = colDef(cell = react_format("f"))
            )
  )
lm_all %>%
  ggplot() +
  geom_abline(aes(intercept = intercept, slope = slope)) +
  scale_x_continuous(limits = c(0, 1)) +
  scale_y_continuous(limits = c(0, 1.3)) +
  geom_vline(xintercept = 0, linetype = "dashed") +
  geom_hline(yintercept = 0, linetype = "dashed") +
  scale_colour_viridis()

Things to ask Stevie

  • step/AIC
  • qexp for qqplot
  • stats::optim non-finite finite-difference value
  • alpha < 0, e.g. sorl1 trans geno
  • APOE dataset smaller amount of discrete DAR values due to less variation and fixed window
  • subsetting the data for lm(?), needed to be removed for mouse dataset
  • lm didnt look good in apoe4v3_male

Notes

  • cqn
  • std error bars
  • sigmoidal function?
  • or just constrain to 1 and 0.1, because 0.1 will always push things out of significance
  • wilcox ranksum to test for uniformity
  • wilcox.test(res_B\(R122Pfs\)PValue, runif(1000))

scaling the alpha: - problem we have is that we have a mix of distributions, the null and where there is a true difference. - if we don’t scale, we’ll be overcorrecting the p values to the point where we will lose the true signal - use the median or max alpha value